Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
mobvista-dmp
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
王金锋
mobvista-dmp
Commits
a53c5f7c
Commit
a53c5f7c
authored
Dec 20, 2021
by
WangJinfeng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
init id_mapping
parent
1dac1a5f
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
23 deletions
+10
-23
IDMappingGraphx.scala
.../mobvista/dmp/datasource/id_mapping/IDMappingGraphx.scala
+10
-23
No files found.
src/main/scala/mobvista/dmp/datasource/id_mapping/IDMappingGraphx.scala
View file @
a53c5f7c
...
...
@@ -3,6 +3,7 @@ package mobvista.dmp.datasource.id_mapping
import
com.alibaba.fastjson.JSONObject
import
mobvista.dmp.common.
{
CommonSparkJob
,
MobvistaConstant
}
import
mobvista.dmp.datasource.id_mapping.Constant._
import
mobvista.dmp.util.MD5Util
import
org.apache.commons.cli.
{
BasicParser
,
Options
}
import
org.apache.commons.lang3.StringUtils
import
org.apache.hadoop.fs.
{
FileSystem
,
Path
}
...
...
@@ -85,33 +86,18 @@ class IDMappingGraphx extends CommonSparkJob with Serializable {
case
_
=>
""
}
val
todayDF
=
spark
.
createDataFrame
(
spark
.
sql
(
dailySQL
).
rdd
.
map
(
row
=>
{
val
todayDF
=
spark
.
createDataFrame
(
spark
.
sql
(
dailySQL
).
sample
(
0.001
).
rdd
.
map
(
row
=>
{
processData
(
row
,
platform
)
}),
schema
=
schame
)
val
vertex
=
todayDF
.
rdd
.
map
(
row
=>
{
val
res
=
processVertex
(
date
,
row
,
idSet
.
toArray
,
idMainSet
)
/*
val res = platform match {
case "ios" =>
processVertex(date, rows, Constant.iosIDSet.toArray, Constant.iosMainIDSet)
case _ => {
country.toUpperCase match {
case "CN" =>
processVertex(date, rows, Constant.androidCNIDSet.toArray, Constant.androidMainIDSet)
case _ =>
processVertex(date, rows, Constant.androidIDSet.toArray, Constant.androidMainIDSet)
}
}
}
*/
res
processVertex
(
date
,
row
,
idSet
.
toArray
,
idMainSet
)
}).
flatMap
(
l
=>
l
)
val
maxGraph
=
vertex
.
combineByKey
(
(
v
:
(
String
,
JSONObject
))
=>
Iterable
(
v
),
(
c
:
Iterable
[(
String
,
JSONObject
)],
v
:
(
String
,
JSONObject
))
=>
c
++
Seq
(
v
),
(
c1
:
Iterable
[(
String
,
JSONObject
)],
c2
:
Iterable
[(
String
,
JSONObject
)])
=>
c1
++
c2
(
v
:
(
String
,
JSONObject
))
=>
Set
(
v
),
(
c
:
Set
[(
String
,
JSONObject
)],
v
:
(
String
,
JSONObject
))
=>
c
++
Seq
(
v
),
(
c1
:
Set
[(
String
,
JSONObject
)],
c2
:
Set
[(
String
,
JSONObject
)])
=>
c1
++
c2
)
FileSystem
.
get
(
new
URI
(
s
"s3://mob-emr-test"
),
spark
.
sparkContext
.
hadoopConfiguration
).
delete
(
new
Path
(
outPutPath
),
true
)
...
...
@@ -202,7 +188,8 @@ class IDMappingGraphx extends CommonSparkJob with Serializable {
""
}
// IosVert(idfa, sysid, idfv_bundle, bmosv_osv_upt, bmosv_upt, bmosv_ipua_bundle, xwho, user_id, bkupid, cnt)
Row
(
idfa
,
sysid
,
idfv_bundle
,
bmosv_osv_upt
,
bmosv_upt
,
bmosv_ipua_bundle
,
xwho
,
user_id
,
bkupid
,
cnt
)
Row
(
idfa
,
sysid
,
MD5Util
.
getMD5Str
(
idfv_bundle
),
MD5Util
.
getMD5Str
(
bmosv_osv_upt
),
MD5Util
.
getMD5Str
(
bmosv_upt
),
MD5Util
.
getMD5Str
(
bmosv_ipua_bundle
),
xwho
,
user_id
,
bkupid
,
cnt
)
case
"android"
=>
val
imei
=
row
.
getAs
[
String
](
"imei"
)
val
android_id
=
row
.
getAs
[
String
](
"android_id"
)
...
...
@@ -238,9 +225,9 @@ class IDMappingGraphx extends CommonSparkJob with Serializable {
}
// AdrVert(imei, gaid, oaid, sysid, android_pkg, bmosv_upt, bmosv_ipua_pkg, xwho, user_id, bkupid, cnt)
if
(
"CN"
.
equalsIgnoreCase
(
country
))
{
Row
(
imei
,
oaid
,
gaid
,
sysid
,
android_pkg
,
bmosv_upt
,
bmosv_ipua_pkg
,
xwho
,
user_id
,
bkupid
,
cnt
)
Row
(
imei
,
oaid
,
gaid
,
sysid
,
MD5Util
.
getMD5Str
(
android_pkg
),
MD5Util
.
getMD5Str
(
bmosv_upt
),
MD5Util
.
getMD5Str
(
bmosv_ipua_pkg
)
,
xwho
,
user_id
,
bkupid
,
cnt
)
}
else
{
Row
(
gaid
,
imei
,
oaid
,
sysid
,
android_pkg
,
bmosv_upt
,
bmosv_ipua_pkg
,
xwho
,
user_id
,
bkupid
,
cnt
)
Row
(
gaid
,
imei
,
oaid
,
sysid
,
MD5Util
.
getMD5Str
(
android_pkg
),
MD5Util
.
getMD5Str
(
bmosv_upt
),
MD5Util
.
getMD5Str
(
bmosv_ipua_pkg
)
,
xwho
,
user_id
,
bkupid
,
cnt
)
}
case
_
=>
Row
(
""
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment