Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
mobvista-dmp
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
王金锋
mobvista-dmp
Commits
ff5d6e34
Commit
ff5d6e34
authored
Dec 20, 2021
by
WangJinfeng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
init id_mapping
parent
a66a6656
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
17 additions
and
23 deletions
+17
-23
IDMappingGraphx.scala
.../mobvista/dmp/datasource/id_mapping/IDMappingGraphx.scala
+17
-23
No files found.
src/main/scala/mobvista/dmp/datasource/id_mapping/IDMappingGraphx.scala
View file @
ff5d6e34
...
...
@@ -95,19 +95,11 @@ class IDMappingGraphx extends CommonSparkJob with Serializable {
}).
flatMap
(
l
=>
l
)
val
maxGraph
=
vertex
.
combineByKey
(
(
v
:
(
String
,
JSONObject
))
=>
Set
(
v
),
(
c
:
Set
[(
String
,
JSONObject
)],
v
:
(
String
,
JSONObject
))
=>
c
++
Seq
(
v
),
(
c1
:
Set
[(
String
,
JSONObject
)],
c2
:
Set
[(
String
,
JSONObject
)])
=>
c1
++
c2
(
v
:
(
String
,
JSONObject
,
String
))
=>
Set
(
v
),
(
c
:
Set
[(
String
,
JSONObject
,
String
)],
v
:
(
String
,
JSONObject
,
String
))
=>
c
++
Seq
(
v
),
(
c1
:
Set
[(
String
,
JSONObject
,
String
)],
c2
:
Set
[(
String
,
JSONObject
,
String
)])
=>
c1
++
c2
)
/*
FileSystem.get(new URI(s"s3://mob-emr-test"), spark.sparkContext.hadoopConfiguration).delete(new Path(outPutPath), true)
maxGraph
.repartition(coalesce)
.saveAsTextFile(outPutPath, classOf[GzipCodec])
*/
val
multiOneIDRDD
=
maxGraph
.
filter
(
kv
=>
{
kv
.
_2
.
size
>
1
}).
map
(
rs
=>
{
...
...
@@ -124,10 +116,12 @@ class IDMappingGraphx extends CommonSparkJob with Serializable {
}).
map
(
kv
=>
{
val
oneID
=
new
JSONObject
()
val
srcID
=
kv
.
_1
var
idType
=
""
kv
.
_2
.
foreach
(
it
=>
{
idType
=
it
.
_3
oneID
.
put
(
it
.
_1
,
it
.
_2
)
})
(
srcID
,
oneID
)
(
srcID
,
oneID
,
idType
)
})
FileSystem
.
get
(
new
URI
(
s
"s3://mob-emr-test"
),
spark
.
sparkContext
.
hadoopConfiguration
).
delete
(
new
Path
(
outPutPath
),
true
)
...
...
@@ -233,8 +227,8 @@ class IDMappingGraphx extends CommonSparkJob with Serializable {
}
}
def
processVertex
(
date
:
String
,
row
:
Row
,
ids
:
Array
[
String
],
mainIDSet
:
Set
[
String
])
:
ArrayBuffer
[(
String
,
(
String
,
JSONObject
))]
=
{
val
array
=
new
ArrayBuffer
[(
String
,
(
String
,
JSONObject
))]()
def
processVertex
(
date
:
String
,
row
:
Row
,
ids
:
Array
[
String
],
mainIDSet
:
Set
[
String
])
:
ArrayBuffer
[(
String
,
(
String
,
JSONObject
,
String
))]
=
{
val
array
=
new
ArrayBuffer
[(
String
,
(
String
,
JSONObject
,
String
))]()
implicit
val
formats
=
org
.
json4s
.
DefaultFormats
// val json = JSON.parseObject(Serialization.write(row))
// 事件频次
...
...
@@ -249,14 +243,15 @@ class IDMappingGraphx extends CommonSparkJob with Serializable {
jsonObject
.
put
(
"active_type"
,
date
)
jsonObject
.
put
(
"cnt"
,
cnt
)
val
oneID
=
row
.
getAs
[
String
](
String
.
valueOf
(
ids
(
i
)))
array
+=
((
oneID
,
(
oneID
,
jsonObject
)))
array
+=
((
oneID
,
(
oneID
,
jsonObject
,
oneIDType
)))
for
(
j
<-
i
+
1
until
ids
.
length
)
{
if
(
StringUtils
.
isNotBlank
(
row
.
getAs
[
String
](
String
.
valueOf
(
ids
(
j
)))))
{
val
srcOrg
=
row
.
getAs
[
String
](
String
.
valueOf
(
ids
(
j
)))
val
srcType
=
ids
(
j
)
val
srcOrg
=
row
.
getAs
[
String
](
String
.
valueOf
(
srcType
))
if
(
mainIDSet
.
contains
(
oneIDType
))
{
array
+=
((
srcOrg
,
(
oneID
,
jsonObject
)))
array
+=
((
srcOrg
,
(
oneID
,
jsonObject
,
srcType
)))
}
else
{
array
+=
((
oneID
,
(
srcOrg
,
jsonObject
)))
array
+=
((
oneID
,
(
srcOrg
,
jsonObject
,
srcType
)))
}
}
}
...
...
@@ -266,19 +261,18 @@ class IDMappingGraphx extends CommonSparkJob with Serializable {
array
}
def
updateOneID
(
kv
:
(
String
,
Iterable
[(
String
,
JSONObject
)]),
mainIDSet
:
Set
[
String
])
:
ArrayBuffer
[(
String
,
JSONObject
)]
=
{
val
array
=
new
ArrayBuffer
[(
String
,
JSONObject
)]()
def
updateOneID
(
kv
:
(
String
,
Iterable
[(
String
,
JSONObject
,
String
)]),
mainIDSet
:
Set
[
String
])
:
ArrayBuffer
[(
String
,
JSONObject
,
String
)]
=
{
val
array
=
new
ArrayBuffer
[(
String
,
JSONObject
,
String
)]()
val
iters
=
kv
.
_2
// val oneID = new ArrayBuffer[(String, String)]()
val
oneID
=
new
JSONObject
()
iters
.
foreach
(
kv
=>
{
oneID
.
put
(
kv
.
_1
,
kv
.
_2
)
})
iters
.
filter
(
tp
=>
{
mainIDSet
.
contains
(
tp
.
_2
.
getString
(
"id_type"
))
!
mainIDSet
.
contains
(
tp
.
_2
.
getString
(
"id_type"
))
}).
foreach
(
itr
=>
{
val
k
=
itr
.
_1
array
+=
((
k
,
oneID
))
array
+=
((
k
,
oneID
,
itr
.
_3
))
})
array
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment