Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
mobvista-dmp
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
王金锋
mobvista-dmp
Commits
7890533e
Commit
7890533e
authored
Jul 26, 2021
by
WangJinfeng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix rtdmp
parent
986ad516
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
33 additions
and
47 deletions
+33
-47
rtdmp.sh
azkaban/rtdmp/rtdmp.sh
+17
-29
rtdmp_fetch.sh
azkaban/rtdmp/rtdmp_fetch.sh
+1
-1
rtdmp_merge.job
azkaban/rtdmp/rtdmp_merge.job
+3
-2
rtdmp_merge_ck.sh
azkaban/rtdmp/rtdmp_merge_ck.sh
+12
-15
No files found.
azkaban/rtdmp/rtdmp.sh
View file @
7890533e
...
@@ -4,48 +4,35 @@ source ../dmp_env.sh
...
@@ -4,48 +4,35 @@ source ../dmp_env.sh
today
=
${
ScheduleTime
}
today
=
${
ScheduleTime
}
date_time
=
$(
date
+
"%Y
-%m-%d.%H"
-d
"-1
hour
$today
"
)
date_time
=
$(
date
+
"%Y
%m%d%H"
-d
"-2
hour
$today
"
)
date_path
=
$(
date
+%Y/%m/%d/%H
-d
"-1 hour
$today
"
)
date_path
=
$(
date
+%Y/%m/%d/%H
-d
"-1 hour
$today
"
)
part_num
=
$(
hadoop fs
-ls
s3://mob-emr-test/dataplatform/rtdmp_pre/
${
date_path
}
/ |
wc
-l
)
if
[[
${
part_num
}
-le
50
]]
;
then
echo
"This Dir No Data !!!"
partition
=
10
coalesce
=
10
executor
=
2
memory
=
4
core
=
2
flag
=
0
else
partition
=
2000
coalesce
=
200
executor
=
8
memory
=
10
core
=
4
flag
=
1
fi
INPUT
=
"s3://mob-emr-test/dataplatform/rtdmp_pre/
${
date_path
}
"
INPUT
=
"s3://mob-emr-test/dataplatform/rtdmp_pre/
${
date_path
}
"
OUTPUT
=
"s3://mob-emr-test/dataplatform/rtdmp_deal/
${
date_path
}
"
OUTPUT
=
"s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/audience_merge/
${
date_path
}
"
before_date_path
=
$(
date
+%Y/%m/%d/%H
-d
"-2 hour
$today
"
)
BEFORE_OUTPUT
=
"s3://mob-emr-test/dataplatform/rtdmp/
${
before_date_path
}
"
check_await
"
${
BEFORE_OUTPUT
}
/_SUCCESS"
spark-submit
--class
mobvista.dmp.datasource.rtdmp.RTDmpMain
\
spark-submit
--class
mobvista.dmp.datasource.rtdmp.RTDmpMain
\
--name
"RTDmpMain.
${
date_time
}
"
\
--name
"RTDmpMain.
${
date_time
}
"
\
--conf
spark.sql.shuffle.partitions
=
${
partition
}
\
--conf
spark.sql.shuffle.partitions
=
1000
\
--conf
spark.default.parallelism
=
${
partition
}
\
--conf
spark.default.parallelism
=
1000
\
--conf
spark.kryoserializer.buffer.max
=
512m
\
--conf
spark.kryoserializer.buffer.max
=
512m
\
--conf
spark.kryoserializer.buffer
=
64m
\
--conf
spark.kryoserializer.buffer
=
64m
\
--master
yarn
--deploy-mode
cluster
\
--master
yarn
--deploy-mode
cluster
\
--executor-memory
${
memory
}
g
--driver-memory
6g
--executor-cores
${
core
}
--num-executors
${
executor
}
\
--executor-memory
18g
--driver-memory
4g
--executor-cores
5
--num-executors
40
\
.././DMP.jar
\
.././DMP.jar
\
-
flag
${
flag
}
-time
${
date_time
}
-input
${
INPUT
}
-output
${
OUTPUT
}
-coalesce
${
coalesce
}
-
datetime
${
date_time
}
-input
${
INPUT
}
-output
${
OUTPUT
}
-coalesce
200
if
[[
$?
-ne
0
]]
;
then
if
[[
$?
-ne
0
]]
;
then
exit
255
exit
255
fi
fi
mount_partition
"audience_merge"
"dt='
${
curr_time
}
'"
"
$OUTPUT
"
expire_time
=
$(
date
+
"%Y%m%d%H"
-d
"-24 hour
$today
"
)
expire_date_path
=
$(
date
+%Y/%m/%d/%H
-d
"-24 hour
$today
"
)
EXPIRE_OUTPUT_PATH
=
"s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/audience_merge/
${
expire_date_path
}
"
unmount_partition
"audience_merge"
"dt='
${
expire_time
}
'"
"
${
EXPIRE_OUTPUT_PATH
}
"
\ No newline at end of file
azkaban/rtdmp/rtdmp_fetch.sh
View file @
7890533e
...
@@ -4,7 +4,7 @@ source ../dmp_env.sh
...
@@ -4,7 +4,7 @@ source ../dmp_env.sh
today
=
${
ScheduleTime
:-
$1
}
today
=
${
ScheduleTime
:-
$1
}
start_time
=
$(
date
+
"%Y-%m-%d %H:00:00"
-d
"-
24
hours
$today
"
)
start_time
=
$(
date
+
"%Y-%m-%d %H:00:00"
-d
"-
168
hours
$today
"
)
end_time
=
$(
date
+
"%Y-%m-%d %H:59:59"
-d
"-1 hours
$today
"
)
end_time
=
$(
date
+
"%Y-%m-%d %H:59:59"
-d
"-1 hours
$today
"
)
java
-cp
../
${
JAR
}
mobvista.dmp.datasource.rtdmp.ServerMain
"
${
start_time
}
"
"
${
end_time
}
"
java
-cp
../
${
JAR
}
mobvista.dmp.datasource.rtdmp.ServerMain
"
${
start_time
}
"
"
${
end_time
}
"
...
...
azkaban/rtdmp/rtdmp_merge.job
View file @
7890533e
type=command
type=command
command=bash -x rtdmp_merge.sh
dependencies=rtdmp_pre
\ No newline at end of file
command=bash -x rtdmp.sh
\ No newline at end of file
azkaban/rtdmp/rtdmp_merge_ck.sh
View file @
7890533e
...
@@ -12,19 +12,17 @@ database="dwh"
...
@@ -12,19 +12,17 @@ database="dwh"
table
=
"audience_merge"
table
=
"audience_merge"
spark-submit
--class
mobvista.dmp.datasource.rtdmp.RTDmpMergeCK
\
spark-submit
--class
mobvista.dmp.datasource.rtdmp.RTDmpMergeCK
\
--name
"RTDmpMergeCK.wangjf.
${
date_time
}
"
\
--name
"RTDmpMergeCK.wangjf.
${
date_time
}
"
\
--conf
spark.sql.shuffle.partitions
=
1000
\
--conf
spark.sql.shuffle.partitions
=
1000
\
--conf
spark.default.parallelism
=
1000
\
--conf
spark.default.parallelism
=
1000
\
--conf
spark.kryoserializer.buffer.max
=
256m
\
--conf
spark.kryoserializer.buffer.max
=
256m
\
--conf
spark.speculation
=
false
\
--conf
spark.speculation
=
false
\
--conf
spark.speculation.quantile
=
0.9
\
--conf
spark.speculation.quantile
=
0.9
\
--conf
spark.speculation.multiplier
=
1.3
\
--conf
spark.speculation.multiplier
=
1.3
\
--conf
spark.executor.extraJavaOptions
=
"-XX:+UseG1GC"
\
--conf
spark.executor.extraJavaOptions
=
"-XX:+UseG1GC"
\
--files
${
HIVE_SITE_PATH
}
\
--master
yarn
--deploy-mode
cluster
--executor-memory
10g
--driver-memory
6g
--executor-cores
6
--num-executors
10
\
--jars
${
SPARK_HOME
}
/auxlib/Common-SerDe-1.0-SNAPSHOT.jar
\
../
${
JAR
}
-date_time
"
${
date_time
}
"
-host
${
host
}
-cluster
${
cluster
}
-database
${
database
}
-table
${
table
}
--master
yarn
--deploy-mode
cluster
--executor-memory
10g
--driver-memory
6g
--executor-cores
5
--num-executors
6
\
../
${
JAR
}
-date_time
"
${
date_time
}
"
-host
${
host
}
-cluster
${
cluster
}
-database
${
database
}
-table
${
table
}
if
[[
$?
-ne
0
]]
;
then
if
[[
$?
-ne
0
]]
;
then
exit
255
exit
255
fi
fi
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment