Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
mobvista-dmp
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
王金锋
mobvista-dmp
Commits
fcb21923
Commit
fcb21923
authored
Aug 05, 2021
by
WangJinfeng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update rtdmp_merge_ck.sh user_info_ck.sh
parent
118555b8
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
7 additions
and
8 deletions
+7
-8
user_info_ck.sh
azkaban/datatory/user_info/user_info_ck.sh
+3
-3
rtdmp_merge_ck.sh
azkaban/rtdmp/rtdmp_merge_ck.sh
+1
-1
UserInfo.scala
...main/scala/mobvista/dmp/clickhouse/feature/UserInfo.scala
+1
-2
RTDmpMain.scala
src/main/scala/mobvista/dmp/datasource/rtdmp/RTDmpMain.scala
+1
-1
RTDmpMergeCK.scala
...in/scala/mobvista/dmp/datasource/rtdmp/RTDmpMergeCK.scala
+1
-1
No files found.
azkaban/datatory/user_info/user_info_ck.sh
View file @
fcb21923
...
...
@@ -26,14 +26,14 @@ table="ods_user_info"
# --conf spark.executor.extraJavaOptions="-XX:+UseG1GC" \
spark-submit
--class
mobvista.dmp.clickhouse.feature.UserInfo
\
--name
"UserInfo_wangjf_
${
date
}
"
\
--conf
spark.sql.shuffle.partitions
=
8
000
\
--conf
spark.default.parallelism
=
8
000
\
--conf
spark.sql.shuffle.partitions
=
10
000
\
--conf
spark.default.parallelism
=
10
000
\
--conf
spark.sql.files.maxPartitionBytes
=
134217728
\
--conf
spark.kryoserializer.buffer.max
=
512m
\
--conf
spark.kryoserializer.buffer
=
64m
\
--conf
spark.executor.extraJavaOptions
=
"-XX:+UseG1GC"
\
--files
${
HIVE_SITE_PATH
}
\
--master
yarn
--deploy-mode
cluster
--executor-memory
8g
--driver-memory
4g
--executor-cores
5
--num-executors
12
\
--master
yarn
--deploy-mode
cluster
--executor-memory
8g
--driver-memory
4g
--executor-cores
4
--num-executors
10
\
../../
${
JAR
}
-date
${
date
}
-host
${
host
}
-cluster
${
cluster
}
-database
${
database
}
-table
${
table
}
if
[[
$?
-ne
0
]]
;
then
...
...
azkaban/rtdmp/rtdmp_merge_ck.sh
View file @
fcb21923
...
...
@@ -20,7 +20,7 @@ spark-submit --class mobvista.dmp.datasource.rtdmp.RTDmpMergeCK \
--conf
spark.speculation.quantile
=
0.9
\
--conf
spark.speculation.multiplier
=
1.3
\
--conf
spark.executor.extraJavaOptions
=
"-XX:+UseG1GC"
\
--master
yarn
--deploy-mode
cluster
--executor-memory
10g
--driver-memory
6g
--executor-cores
6
--num-executors
10
\
--master
yarn
--deploy-mode
cluster
--executor-memory
8g
--driver-memory
4g
--executor-cores
5
--num-executors
10
\
../
${
JAR
}
-date_time
"
${
date_time
}
"
-host
${
host
}
-cluster
${
cluster
}
-database
${
database
}
-table
${
table
}
if
[[
$?
-ne
0
]]
;
then
...
...
src/main/scala/mobvista/dmp/clickhouse/feature/UserInfo.scala
View file @
fcb21923
...
...
@@ -139,13 +139,12 @@ class UserInfo extends Serializable {
r
.
getAs
(
"update_date"
),
r
.
getAs
(
"publish_date"
))
}).
toDF
Thread
.
sleep
(
120000
)
/**
* user_info save
*/
// userDF.createClickHouseDb(database, clusterName)
// userDF.createClickHouseTable(database, table, Seq("dt"), Constant.indexColumn, Constant.orderColumn, clusterName)
userDF
.
saveToClickHouse
(
database
,
table
,
Seq
(
update_date
),
Seq
(
"dt"
),
clusterName
,
batchSize
=
2
00000
)
userDF
.
saveToClickHouse
(
database
,
table
,
Seq
(
update_date
),
Seq
(
"dt"
),
clusterName
,
batchSize
=
1
00000
)
MySQLUtil
.
update
(
database
,
table
,
date
)
// val lastDate = DateUtil.getDayByString(date, "yyyyMMdd", -1)
...
...
src/main/scala/mobvista/dmp/datasource/rtdmp/RTDmpMain.scala
View file @
fcb21923
...
...
@@ -73,7 +73,7 @@ class RTDmpMain extends CommonSparkJob with Serializable {
val
calendar
=
Calendar
.
getInstance
()
val
date
=
sdf
.
parse
(
datetime
)
calendar
.
setTime
(
date
)
calendar
.
set
(
Calendar
.
HOUR_OF_DAY
,
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
)
-
24
)
calendar
.
set
(
Calendar
.
HOUR_OF_DAY
,
calendar
.
get
(
Calendar
.
HOUR_OF_DAY
)
-
48
)
val
expire_time
=
sdf
.
format
(
calendar
.
getTime
)
val
hour_rdd
=
spark
.
read
.
orc
(
input
).
rdd
.
map
(
row
=>
{
...
...
src/main/scala/mobvista/dmp/datasource/rtdmp/RTDmpMergeCK.scala
View file @
fcb21923
...
...
@@ -80,7 +80,7 @@ class RTDmpMergeCK extends CommonSparkJob with Serializable {
Thread
.
sleep
(
120000
)
df
.
saveToClickHouse
(
database
,
table
,
Seq
(
dt
,
hour_part
),
Seq
(
"dt"
,
"hour"
),
clusterName
,
batchSize
=
2
00000
)
df
.
saveToClickHouse
(
database
,
table
,
Seq
(
dt
,
hour_part
),
Seq
(
"dt"
,
"hour"
),
clusterName
,
batchSize
=
1
00000
)
MySQLUtil
.
update
(
database
,
table
,
date_time
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment