Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
mobvista-dmp
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
王金锋
mobvista-dmp
Commits
12a03fcf
Commit
12a03fcf
authored
Dec 30, 2021
by
WangJinfeng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix tracking etl job
parent
03a77d82
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
52 additions
and
25 deletions
+52
-25
adn_request_id_mapping.sh
azkaban/id_mapping/adn_request_id_mapping.sh
+2
-2
dsp_id_mapping.sh
azkaban/id_mapping/dsp_id_mapping.sh
+2
-2
AdnRequest.scala
...scala/mobvista/dmp/datasource/id_mapping/AdnRequest.scala
+12
-3
Constant.scala
...n/scala/mobvista/dmp/datasource/id_mapping/Constant.scala
+21
-11
DspReq.scala
...ain/scala/mobvista/dmp/datasource/id_mapping/DspReq.scala
+2
-1
EtlDeviceIdDaily.scala
...mobvista/dmp/datasource/id_mapping/EtlDeviceIdDaily.scala
+13
-6
No files found.
azkaban/id_mapping/adn_request_id_mapping.sh
View file @
12a03fcf
...
...
@@ -28,6 +28,6 @@ if [ $? -ne 0 ]; then
exit
255
fi
common_mount_partition
"dwd"
"dwd_device_ios_ids_inc_daily"
"dt='
${
LOG_TIME
}
',active_type='
${
BUSINESS
}
'"
"
${
OUTPUT_PATH
}
/ios"
#
common_mount_partition "dwd" "dwd_device_ios_ids_inc_daily" "dt='${LOG_TIME}',active_type='${BUSINESS}'" "${OUTPUT_PATH}/ios"
common_mount_partition
"dwd"
"dwd_device_android_ids_inc_daily"
"dt='
${
LOG_TIME
}
',active_type='
${
BUSINESS
}
'"
"
${
OUTPUT_PATH
}
/android"
#
common_mount_partition "dwd" "dwd_device_android_ids_inc_daily" "dt='${LOG_TIME}',active_type='${BUSINESS}'" "${OUTPUT_PATH}/android"
azkaban/id_mapping/dsp_id_mapping.sh
View file @
12a03fcf
...
...
@@ -28,6 +28,6 @@ if [ $? -ne 0 ]; then
exit
255
fi
common_mount_partition
"dwd"
"dwd_device_ios_ids_inc_daily"
"dt='
${
LOG_TIME
}
',active_type='
${
BUSINESS
}
'"
"
${
OUTPUT_PATH
}
/ios"
#
common_mount_partition "dwd" "dwd_device_ios_ids_inc_daily" "dt='${LOG_TIME}',active_type='${BUSINESS}'" "${OUTPUT_PATH}/ios"
common_mount_partition
"dwd"
"dwd_device_android_ids_inc_daily"
"dt='
${
LOG_TIME
}
',active_type='
${
BUSINESS
}
'"
"
${
OUTPUT_PATH
}
/android"
#
common_mount_partition "dwd" "dwd_device_android_ids_inc_daily" "dt='${LOG_TIME}',active_type='${BUSINESS}'" "${OUTPUT_PATH}/android"
src/main/scala/mobvista/dmp/datasource/id_mapping/AdnRequest.scala
View file @
12a03fcf
...
...
@@ -17,11 +17,19 @@ class AdnRequest extends EtlDeviceIdDaily {
override
def
processData
(
date
:
String
,
i
:
Int
,
spark
:
SparkSession
)
:
RDD
[(
String
,
Row
)]
=
{
spark
.
udf
.
register
(
"getDevId"
,
getDevId
_
)
spark
.
udf
.
register
(
"parseUA"
,
parseUA
_
)
val
year
=
date
.
substring
(
0
,
4
)
val
month
=
date
.
substring
(
4
,
6
)
val
day
=
date
.
substring
(
6
,
8
)
// DWD
// val sql = Constant.adn_request_sql.replace("@date", date)
// ODS
val
sql
=
Constant
.
adn_request_sql_v3
.
replace
(
"@date"
,
date
)
val
rdd
=
spark
.
sql
(
sql
).
coalesce
(
5000
).
rdd
.
map
(
row
=>
{
val
sql
=
Constant
.
adn_request_sql_v3
.
replace
(
"@year"
,
year
)
.
replace
(
"@month"
,
month
)
.
replace
(
"@day"
,
day
)
val
rdd
=
spark
.
sql
(
sql
).
coalesce
(
20000
).
rdd
.
map
(
row
=>
{
val
gaid
=
row
.
getAs
[
String
](
"gaid"
)
val
idfa
=
row
.
getAs
[
String
](
"idfa"
)
val
imei
=
row
.
getAs
[
String
](
"imei"
)
...
...
@@ -40,6 +48,7 @@ class AdnRequest extends EtlDeviceIdDaily {
val
osv_upt
=
row
.
getAs
[
String
](
"osv_upt"
)
val
upt
=
row
.
getAs
[
String
](
"upt"
)
val
network_type
=
row
.
getAs
[
String
](
"network_type"
)
val
cnt
=
row
.
getAs
[
Long
](
"cnt"
)
var
sysId
=
""
var
bkupId
=
""
if
(
StringUtils
.
isNotBlank
(
extSysid
))
{
...
...
@@ -56,7 +65,7 @@ class AdnRequest extends EtlDeviceIdDaily {
}
}
process
(
idfa
,
idfv
,
pkg_name
,
imei
,
androidId
,
oaid
,
gaid
,
sysId
,
bkupId
,
country
,
ip
,
ua
,
brand
,
model
,
os_version
,
osv_upt
,
upt
,
network_type
,
platform
)
osv_upt
,
upt
,
network_type
,
platform
,
cnt
)
})
rdd
}
...
...
src/main/scala/mobvista/dmp/datasource/id_mapping/Constant.scala
View file @
12a03fcf
...
...
@@ -70,7 +70,8 @@ object Constant {
StructField
(
"osv_upt"
,
StringType
),
// 操作系统更新时间
StructField
(
"upt"
,
StringType
),
// 开机时间
StructField
(
"event_type"
,
StringType
),
// 事件类型
StructField
(
"network_type"
,
StringType
)
// 网络类型,用于判断API/SDK
StructField
(
"network_type"
,
StringType
),
// 网络类型,用于判断API/SDK
StructField
(
"cnt"
,
LongType
)
// 事件频次
))
/**
...
...
@@ -148,7 +149,8 @@ object Constant {
StructField
(
"osv_upt"
,
StringType
),
// 操作系统更新时间
StructField
(
"upt"
,
StringType
),
// 开机时间
StructField
(
"event_type"
,
StringType
),
// 事件类型
StructField
(
"network_type"
,
StringType
)
// 网络类型,用于判断API/SDK
StructField
(
"network_type"
,
StringType
),
// 网络类型,用于判断API/SDK
StructField
(
"cnt"
,
LongType
)
// 事件频次
))
val
otherSchema
:
StructType
=
StructType
(
Array
(
...
...
@@ -172,7 +174,9 @@ object Constant {
StructField
(
"osv_upt"
,
StringType
),
// 操作系统更新时间
StructField
(
"upt"
,
StringType
),
// 开机时间
StructField
(
"event_type"
,
StringType
),
// 事件类型
StructField
(
"network_type"
,
StringType
)
// 网络类型,用于判断API/SDK
StructField
(
"network_type"
,
StringType
),
// 网络类型,用于判断API/SDK
StructField
(
"cnt"
,
LongType
)
// 事件频次
))
val
dsp_req_sql
=
...
...
@@ -186,9 +190,11 @@ object Constant {
"""
|SELECT idfa, googleadid gaid, ext5 exitid, os platform, countrycode country, deviceip ip,
| parseUA(ext6) ua, make brand, model, osv os_version, '0' osv_upt, '0' upt, cpackagename pkg_name,
| cncttype network_type
| cncttype network_type
, COUNT(1) cnt
| FROM adn_dsp.log_adn_dsp_request_orc_hour
| WHERE CONCAT(yr,mt,dt) = '@date' @hour
| GROUP BY idfa, googleadid, ext5, os, countrycode, deviceip,
| parseUA(ext6), make, model, osv, cpackagename, cncttype
|"""
.
stripMargin
val
adn_request_sql
=
...
...
@@ -215,9 +221,12 @@ object Constant {
"""
|SELECT idfa, gaid, ext_oaid oaid, getDevId(cdn_ab) idfv, ext_packagename pkg_name, dev_id androidid, ext_sysid extsysid, imei, platform,
| country_code country, ip, parseUA(ext_systemuseragent) ua, ext_brand brand, ext_model model, os_version,
| COALESCE(get_json_object(ext_data,'$.osvut'),0) osv_upt, COALESCE(get_json_object(ext_data,'$.upt'),0) upt, network_type
| COALESCE(get_json_object(ext_data,'$.osvut'),0) osv_upt, COALESCE(get_json_object(ext_data,'$.upt'),0) upt, network_type
, COUNT(1) cnt
| FROM dwh.ods_adn_trackingnew_request
| WHERE CONCAT(yyyy,mm,dd) = '@date'
| WHERE yyyy = '@year' AND mm = '@month' AND dd = '@day'
| GROUP BY idfa, gaid, ext_oaid, getDevId(cdn_ab), ext_packagename, dev_id, ext_sysid, imei, platform,
| country_code, ip, parseUA(ext_systemuseragent), ext_brand, ext_model, os_version,
| COALESCE(get_json_object(ext_data,'$.osvut'),0), COALESCE(get_json_object(ext_data,'$.upt'),0), network_type
|"""
.
stripMargin
val
ios_id_mapping_sql
:
String
=
...
...
@@ -272,7 +281,7 @@ object Constant {
def
process
(
idfa
:
String
,
idfv
:
String
,
pkg_name
:
String
,
imei
:
String
,
androidId
:
String
,
oaid
:
String
,
gaid
:
String
,
sysId
:
String
,
bkupId
:
String
,
country
:
String
,
ip
:
String
,
ua
:
String
,
brand
:
String
,
model
:
String
,
os_version
:
String
,
osv_upt
:
String
,
upt
:
String
,
network_type
:
String
,
platform
:
String
)
:
(
String
,
Row
)
=
{
upt
:
String
,
network_type
:
String
,
platform
:
String
,
cnt
:
Long
)
:
(
String
,
Row
)
=
{
val
f_idfa
=
if
(
StringUtils
.
isNotBlank
(
idfa
)
&&
idfa
.
matches
(
didPtn
)
&&
!
idfa
.
matches
(
allZero
))
{
idfa
}
else
{
...
...
@@ -375,16 +384,16 @@ object Constant {
"other"
}
val
rw
=
if
(
"ios"
.
equals
(
f_platform
))
{
Row
(
f_idfa
,
f_idfv
,
pkg_name
,
f_sysId
,
f_bkupId
,
""
,
""
,
f_country
,
f_ip
,
f_ua
,
f_brand
,
f_model
,
f_osv
,
f_osv_upt
,
f_upt
,
""
,
f_network_type
)
Row
(
f_idfa
,
f_idfv
,
pkg_name
,
f_sysId
,
f_bkupId
,
""
,
""
,
f_country
,
f_ip
,
f_ua
,
f_brand
,
f_model
,
f_osv
,
f_osv_upt
,
f_upt
,
""
,
f_network_type
,
cnt
)
}
else
if
(
"android"
.
equals
(
f_platform
))
{
Row
(
f_imei
,
f_androidId
,
pkg_name
,
f_oaid
,
f_gaid
,
f_sysId
,
f_bkupId
,
""
,
""
,
f_country
,
f_ip
,
f_ua
,
f_brand
,
f_model
,
f_osv
,
f_osv_upt
,
f_upt
,
""
,
f_network_type
)
Row
(
f_imei
,
f_androidId
,
pkg_name
,
f_oaid
,
f_gaid
,
f_sysId
,
f_bkupId
,
""
,
""
,
f_country
,
f_ip
,
f_ua
,
f_brand
,
f_model
,
f_osv
,
f_osv_upt
,
f_upt
,
""
,
f_network_type
,
cnt
)
}
else
{
Row
(
f_idfa
,
f_idfv
,
pkg_name
,
f_imei
,
f_androidId
,
f_oaid
,
f_gaid
,
f_sysId
,
f_bkupId
,
""
,
""
,
f_country
,
f_ip
,
f_ua
,
f_brand
,
f_model
,
f_osv
,
f_osv_upt
,
f_upt
,
""
,
f_network_type
)
Row
(
f_idfa
,
f_idfv
,
pkg_name
,
f_imei
,
f_androidId
,
f_oaid
,
f_gaid
,
f_sysId
,
f_bkupId
,
""
,
""
,
f_country
,
f_ip
,
f_ua
,
f_brand
,
f_model
,
f_osv
,
f_osv_upt
,
f_upt
,
""
,
f_network_type
,
cnt
)
}
(
f_platform
,
rw
)
}
case
class
Result
(
device_id
:
String
,
device_type
:
String
,
one_id
:
String
)
extends
Serializable
case
class
OneIDScore
(
one_id
:
String
,
one_type
:
String
,
one_score
:
Double
)
extends
Serializable
case
class
OneIDScore
(
one_id
:
String
,
one_type
:
String
,
one_score
:
Double
,
one_version
:
String
)
extends
Serializable
}
\ No newline at end of file
src/main/scala/mobvista/dmp/datasource/id_mapping/DspReq.scala
View file @
12a03fcf
...
...
@@ -58,6 +58,7 @@ class DspReq extends EtlDeviceIdDaily {
val
os_version
=
row
.
getAs
[
String
](
"os_version"
)
val
osv_upt
=
row
.
getAs
[
String
](
"osv_upt"
)
val
upt
=
row
.
getAs
[
String
](
"upt"
)
val
cnt
=
row
.
getAs
[
Long
](
"cnt"
)
var
idfv
=
""
var
oaid
=
""
var
imei
=
""
...
...
@@ -90,7 +91,7 @@ class DspReq extends EtlDeviceIdDaily {
}
}
process
(
idfa
,
idfv
,
pkg_name
,
imei
,
androidId
,
oaid
,
gaid
,
sysId
=
""
,
bkupId
=
""
,
country
,
ip
,
ua
,
brand
,
model
,
os_version
,
osv_upt
,
upt
,
network_type
,
platform
)
model
,
os_version
,
osv_upt
,
upt
,
network_type
,
platform
,
cnt
)
})
rdd
}
...
...
src/main/scala/mobvista/dmp/datasource/id_mapping/EtlDeviceIdDaily.scala
View file @
12a03fcf
...
...
@@ -42,6 +42,7 @@ abstract class EtlDeviceIdDaily extends CommonSparkJob with Serializable {
if
(
"dsp_req"
.
equalsIgnoreCase
(
business
))
{
for
(
i
<-
0
until
6
)
{
val
df
=
processData
(
date
,
i
,
spark
)
.
repartition
(
coalesce
)
df
.
persist
(
StorageLevel
.
MEMORY_AND_DISK_SER
)
val
iosTab
=
df
.
filter
(
plf
=>
{
...
...
@@ -52,7 +53,6 @@ abstract class EtlDeviceIdDaily extends CommonSparkJob with Serializable {
FileSystem
.
get
(
new
URI
(
s
"s3://mob-emr-test"
),
spark
.
sparkContext
.
hadoopConfiguration
).
delete
(
new
Path
(
output
+
s
"/ios/${i}"
),
true
)
spark
.
createDataFrame
(
iosTab
,
iosSchema
)
.
repartition
(
coalesce
)
.
write
.
mode
(
SaveMode
.
Overwrite
)
.
option
(
"orc.compress"
,
"zlib"
)
.
orc
(
output
+
s
"/ios/${i}"
)
...
...
@@ -66,7 +66,6 @@ abstract class EtlDeviceIdDaily extends CommonSparkJob with Serializable {
FileSystem
.
get
(
new
URI
(
s
"s3://mob-emr-test"
),
spark
.
sparkContext
.
hadoopConfiguration
).
delete
(
new
Path
(
output
+
s
"/android/${i}"
),
true
)
spark
.
createDataFrame
(
adrTab
,
adrSchema
)
.
repartition
(
coalesce
)
.
write
.
mode
(
SaveMode
.
Overwrite
)
.
option
(
"orc.compress"
,
"zlib"
)
.
orc
(
output
+
s
"/android/${i}"
)
...
...
@@ -80,7 +79,6 @@ abstract class EtlDeviceIdDaily extends CommonSparkJob with Serializable {
FileSystem
.
get
(
new
URI
(
s
"s3://mob-emr-test"
),
spark
.
sparkContext
.
hadoopConfiguration
).
delete
(
new
Path
(
output
+
s
"/other/${i}"
),
true
)
spark
.
createDataFrame
(
otherTab
,
otherSchema
)
.
repartition
(
coalesce
)
.
write
.
mode
(
SaveMode
.
Overwrite
)
.
option
(
"orc.compress"
,
"zlib"
)
.
orc
(
output
+
s
"/other/${i}"
)
...
...
@@ -89,6 +87,7 @@ abstract class EtlDeviceIdDaily extends CommonSparkJob with Serializable {
}
}
else
{
val
df
=
processData
(
date
,
0
,
spark
)
.
repartition
(
coalesce
)
df
.
persist
(
StorageLevel
.
MEMORY_AND_DISK_SER
)
val
iosTab
=
df
.
filter
(
plf
=>
{
...
...
@@ -99,7 +98,6 @@ abstract class EtlDeviceIdDaily extends CommonSparkJob with Serializable {
FileSystem
.
get
(
new
URI
(
s
"s3://mob-emr-test"
),
spark
.
sparkContext
.
hadoopConfiguration
).
delete
(
new
Path
(
output
+
"/ios"
),
true
)
spark
.
createDataFrame
(
iosTab
,
iosSchema
)
.
coalesce
(
coalesce
)
.
write
.
mode
(
SaveMode
.
Overwrite
)
.
option
(
"orc.compress"
,
"zlib"
)
.
orc
(
output
+
"/ios"
)
...
...
@@ -113,7 +111,6 @@ abstract class EtlDeviceIdDaily extends CommonSparkJob with Serializable {
FileSystem
.
get
(
new
URI
(
s
"s3://mob-emr-test"
),
spark
.
sparkContext
.
hadoopConfiguration
).
delete
(
new
Path
(
output
+
s
"/android"
),
true
)
spark
.
createDataFrame
(
adrTab
,
adrSchema
)
.
coalesce
(
coalesce
)
.
write
.
mode
(
SaveMode
.
Overwrite
)
.
option
(
"orc.compress"
,
"zlib"
)
.
orc
(
output
+
s
"/android"
)
...
...
@@ -127,11 +124,21 @@ abstract class EtlDeviceIdDaily extends CommonSparkJob with Serializable {
FileSystem
.
get
(
new
URI
(
s
"s3://mob-emr-test"
),
spark
.
sparkContext
.
hadoopConfiguration
).
delete
(
new
Path
(
output
+
s
"/other"
),
true
)
spark
.
createDataFrame
(
otherTab
,
otherSchema
)
.
coalesce
(
coalesce
)
.
write
.
mode
(
SaveMode
.
Overwrite
)
.
option
(
"orc.compress"
,
"zlib"
)
.
orc
(
output
+
s
"/other"
)
}
spark
.
sql
(
s
"""
|ALTER TABLE dws.dws_device_id_ios_frequency ADD IF NOT EXISTS PARTITION (dt='$date',source='${business}')
| LOCATION '$output/ios'
|"""
.
stripMargin
)
spark
.
sql
(
s
"""
|ALTER TABLE dws.dws_device_id_android_frequency ADD IF NOT EXISTS PARTITION (dt='$date',source='${business}')
| LOCATION '$output/android'
|"""
.
stripMargin
)
}
finally
{
if
(
spark
!=
null
)
{
spark
.
stop
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment