Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
mobvista-dmp
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
王金锋
mobvista-dmp
Commits
d2703fa6
Commit
d2703fa6
authored
Jul 23, 2021
by
WangJinfeng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
rtdmp_merge remove
parent
5675c153
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
23 additions
and
11 deletions
+23
-11
RTDmpMergeCK.scala
...in/scala/mobvista/dmp/datasource/rtdmp/RTDmpMergeCK.scala
+23
-11
No files found.
src/main/scala/mobvista/dmp/datasource/rtdmp/RTDmpMergeCK.scala
View file @
d2703fa6
...
...
@@ -8,6 +8,8 @@ import mobvista.dmp.utils.clickhouse.ClickHouseSparkExt._
import
org.apache.commons.cli.
{
BasicParser
,
Options
}
import
ru.yandex.clickhouse.ClickHouseDataSource
import
java.text.SimpleDateFormat
import
java.util.Calendar
import
scala.collection.JavaConversions._
import
scala.collection.mutable
...
...
@@ -48,27 +50,37 @@ class RTDmpMergeCK extends CommonSparkJob with Serializable {
try
{
expire_time
=
DateUtil
.
getDayByString
(
date_time
,
"yyyyMMddHH"
,
-
1
)
spark
.
udf
.
register
(
"process"
,
process
_
)
val
df
=
spark
.
sql
(
sql
.
replace
(
"@dt"
,
date_time
))
.
filter
(
"size(audience_id) > 0"
)
val
tdf
=
spark
.
emptyDataFrame
val
sdf
=
new
SimpleDateFormat
(
"yyyyMMddHH"
)
// drop expire partition
val
calendar
=
Calendar
.
getInstance
()
var
date
=
sdf
.
parse
(
date_time
)
calendar
.
setTime
(
date
)
calendar
.
set
(
Calendar
.
HOUR_OF_DAY
,
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
)
-
6
)
val
expire_part
=
sdf
.
format
(
calendar
.
getTime
)
var
dt_part
=
expire_part
.
substring
(
0
,
8
)
var
hour_part
=
expire_part
.
substring
(
8
,
10
)
implicit
val
clickhouseDataSource
:
ClickHouseDataSource
=
ClickHouseConnectionFactory
.
get
(
host
)
val
clusterName
=
Some
(
cluster
)
:
Option
[
String
]
val
date
=
date_time
.
substring
(
0
,
8
)
tdf
.
dropPartition
(
database
,
table
,
s
"($dt_part,'$hour_part')"
,
clusterName
)
val
dt
=
MobvistaConstant
.
sdf1
.
format
(
MobvistaConstant
.
sdf2
.
parse
(
date
))
val
hour
=
date_time
.
substring
(
8
,
10
)
spark
.
udf
.
register
(
"process"
,
process
_
)
val
tdf
=
spark
.
emptyDataFrame
// drop curr partition
tdf
.
dropPartition
(
database
,
table
,
s
"($date,'$hour')"
,
clusterName
)
val
df
=
spark
.
sql
(
sql
.
replace
(
"@dt"
,
date_time
))
.
filter
(
"size(audience_id) > 0"
)
dt_part
=
date_time
.
substring
(
0
,
8
)
hour_part
=
expire_time
.
substring
(
8
,
10
)
val
dt
=
MobvistaConstant
.
sdf1
.
format
(
MobvistaConstant
.
sdf2
.
parse
(
dt_part
))
tdf
.
dropPartition
(
database
,
table
,
s
"($dt_part,'$hour_part')"
,
clusterName
)
Thread
.
sleep
(
120000
)
df
.
saveToClickHouse
(
database
,
table
,
Seq
(
dt
,
hour
),
Seq
(
"dt"
,
"hour"
),
clusterName
,
batchSize
=
200000
)
df
.
saveToClickHouse
(
database
,
table
,
Seq
(
dt
,
hour
_part
),
Seq
(
"dt"
,
"hour"
),
clusterName
,
batchSize
=
200000
)
}
finally
{
if
(
spark
!=
null
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment