Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
mobvista-dmp
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
王金锋
mobvista-dmp
Commits
fd0560d0
Commit
fd0560d0
authored
May 27, 2021
by
wang-jinfeng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimize dmp
parent
a355902f
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
33 additions
and
41 deletions
+33
-41
uc_imei_lahuo_request.sh
azkaban/ali/uc_lahuo/uc_imei_lahuo_request.sh
+1
-1
uc_oaid_lahuo_request.sh
azkaban/ali/uc_lahuo/uc_oaid_lahuo_request.sh
+1
-1
uc_other_data_to_dmp.job
azkaban/ali/uc_lahuo/uc_other_data_to_dmp.job
+2
-2
uc_other_data_to_dmp_v2.sh
azkaban/ali/uc_lahuo/uc_other_data_to_dmp_v2.sh
+1
-1
youku_imei_laxin_request.sh
azkaban/ali/youku_laxin/youku_imei_laxin_request.sh
+1
-1
youku_oaid_laxin_request.sh
azkaban/ali/youku_laxin/youku_oaid_laxin_request.sh
+1
-1
iqiyi_lahuo_request.sh
azkaban/iqiyi/iqiyi_lahuo_request.sh
+1
-1
BaiChuanMainV2.java
...java/mobvista/dmp/datasource/baichuan/BaiChuanMainV2.java
+2
-2
ip.properties
src/main/resources/ip.properties
+8
-13
UCOtherDataToDmpV2.scala
...a/mobvista/dmp/datasource/taobao/UCOtherDataToDmpV2.scala
+15
-18
No files found.
azkaban/ali/uc_lahuo/uc_imei_lahuo_request.sh
View file @
fd0560d0
...
...
@@ -34,7 +34,7 @@ if [[ $? -ne 0 ]]; then
exit
255
fi
sleep
$((
fors
*
4
0
))
sleep
$((
fors
*
6
0
))
ssh
-o
"StrictHostKeyChecking no"
-i
/home/hadoop/wangjf/dataplatform_cn.pem
-l
root 182.92.177.185
"sh -x /root/workspace/check_process.sh '
${
shell
}
'"
...
...
azkaban/ali/uc_lahuo/uc_oaid_lahuo_request.sh
View file @
fd0560d0
...
...
@@ -34,7 +34,7 @@ if [[ $? -ne 0 ]]; then
exit
255
fi
sleep
$((
fors
*
4
0
))
sleep
$((
fors
*
6
0
))
ssh
-o
"StrictHostKeyChecking no"
-i
/home/hadoop/wangjf/dataplatform_cn.pem
-l
root 182.92.177.185
"sh -x /root/workspace/check_process.sh '
${
shell
}
'"
...
...
azkaban/ali/uc_lahuo/uc_other_data_to_dmp.job
View file @
fd0560d0
type=command
dependencies=uc_lahuo_data_to_dmp
command=sh -x uc_other_data_to_dmp.sh
\ No newline at end of file
command=sh -x uc_other_data_to_dmp_v2.sh
\ No newline at end of file
azkaban/ali/uc_lahuo/uc_other_data_to_dmp_v2.sh
View file @
fd0560d0
...
...
@@ -22,7 +22,7 @@ EXPIRE_OUTPUT_PATH="${UC_LAHUO_TMP_DAILY_TO_S3}/${expire_date_path}/uc_activatio
# OUTPUT_PATH03="${UC_LAHUO_TMP_DAILY_TO_S3}/${dt_slash_today}/uc_activation_other_data/4b5a58_ucoppo"
# OUTPUT_PATH04="${UC_LAHUO_TMP_DAILY_TO_S3}/${dt_slash_today}/uc_activation_other_data/d3f521_ucoppo"
OUTPUT_PATH
=
"
${
UC_LAHUO_TMP_DAILY_TO_S3
}
/
${
dt_slash_today
}
/uc_activation_other_data
/
"
OUTPUT_PATH
=
"
${
UC_LAHUO_TMP_DAILY_TO_S3
}
/
${
dt_slash_today
}
/uc_activation_other_data"
hadoop fs
-rm
-r
"
${
OUTPUT_PATH
}
"
...
...
azkaban/ali/youku_laxin/youku_imei_laxin_request.sh
View file @
fd0560d0
...
...
@@ -39,7 +39,7 @@ if [[ $? -ne 0 ]]; then
exit
255
fi
sleep
$((
fors
*
1
5
))
sleep
$((
fors
*
2
5
))
ssh
-o
"StrictHostKeyChecking no"
-i
/home/hadoop/wangjf/dataplatform_cn.pem
-l
root 182.92.177.185
"sh -x /root/workspace/check_process.sh '
${
shell
}
'"
...
...
azkaban/ali/youku_laxin/youku_oaid_laxin_request.sh
View file @
fd0560d0
...
...
@@ -39,7 +39,7 @@ if [[ $? -ne 0 ]]; then
exit
255
fi
sleep
$((
fors
*
1
5
))
sleep
$((
fors
*
2
5
))
ssh
-o
"StrictHostKeyChecking no"
-i
/home/hadoop/wangjf/dataplatform_cn.pem
-l
root 182.92.177.185
"sh -x /root/workspace/check_process.sh '
${
shell
}
'"
...
...
azkaban/iqiyi/iqiyi_lahuo_request.sh
View file @
fd0560d0
...
...
@@ -37,7 +37,7 @@ if [[ $? -ne 0 ]]; then
exit
255
fi
sleep
$((
fors
*
25
))
sleep
$((
fors
*
50
))
shell
=
" -cp /root/workspace/DMP-1.0.3-jar-with-dependencies.jar mobvista.dmp.datasource.iqiyi.IQiYiRequest"
...
...
src/main/java/mobvista/dmp/datasource/baichuan/BaiChuanMainV2.java
View file @
fd0560d0
...
...
@@ -32,8 +32,8 @@ import java.util.concurrent.TimeUnit;
public
class
BaiChuanMainV2
{
private
static
String
dt
=
DateUtil
.
format
(
new
Date
(),
"yyyy-MM-dd"
);
static
ThreadPoolExecutor
poolExecutor
=
new
ThreadPoolExecutor
(
200
,
4
00
,
500
,
TimeUnit
.
MILLISECONDS
,
new
LinkedBlockingDeque
<>(
4
00
),
new
CustomizableThreadFactory
(
"BaiChuan"
),
new
ThreadPoolExecutor
.
CallerRunsPolicy
());
static
ThreadPoolExecutor
poolExecutor
=
new
ThreadPoolExecutor
(
100
,
2
00
,
500
,
TimeUnit
.
MILLISECONDS
,
new
LinkedBlockingDeque
<>(
2
00
),
new
CustomizableThreadFactory
(
"BaiChuan"
),
new
ThreadPoolExecutor
.
CallerRunsPolicy
());
public
static
void
main
(
String
[]
args
)
throws
JoranException
,
InterruptedException
{
LoggerContext
context
=
(
LoggerContext
)
LoggerFactory
.
getILoggerFactory
();
...
...
src/main/resources/ip.properties
View file @
fd0560d0
...
...
@@ -37,21 +37,16 @@ spark_clickhouse_url=jdbc:clickhouse://192.168.17.122:8123,192.168.17.123:8123,1
mapping.se.host
=
3.34.241.249,13.209.48.228,3.35.218.108
mapping.sg.host
=
18.13
6.203.33,18.139.0.13,18.139.160.145
mapping.sg.host
=
18.13
9.0.13,18.139.160.145,54.251.75.86
mapping.aliyun.host
=
10.22.2.232,10.22.2.231,10.22.1.97,10.22.1.98,10.22.1.100,10.22.0.24,10.22.0.23,10.22.0.22,10.22.3.126,10.22.0.40,10.22.0.39,
\
10.22.1.39,10.22.3.120,10.22.3.178,10.22.1.100,10.22.1.98,10.22.1.97
mapping.aliyun.host
=
10.22.1.188,10.22.1.187,10.22.1.186
mapping.fk.host
=
3.122.120.90,35.159.16.74,3.121.69.130,18.193.88.136,18.184.171.197,3.65.211.221,3.120.203.124,3.65.116.29,54.93.86.92,3.65.111.184,
\
18.195.203.195,18.192.124.88,18.196.157.191,3.65.136.90
mapping.fk.host
=
18.159.113.192,3.126.116.164,3.126.249.98
mapping.se.host_map
=
172.31.24.255:3.34.241.249,172.31.29.23:13.209.48.228,172.31.21.185:3.35.218.108
mapping.sg.host_map
=
172.31.
17.79:18.136.203.33,172.31.23.187:18.139.0.13,172.31.18.220:18.139.160.145
mapping.sg.host_map
=
172.31.
23.187:18.139.0.13,172.31.18.220:18.139.160.145,172.31.31.118:54.251.75.86
mapping.aliyun.host_map
=
10.22.2.232:10.22.2.232,10.22.2.231:10.22.2.231,10.22.1.97:10.22.1.97,10.22.1.98:10.22.1.98,10.22.1.100:10.22.1.100,
\
10.22.0.24:10.22.0.24,10.22.0.23:10.22.0.23,10.22.0.22:10.22.0.22,10.22.3.126:10.22.3.126,10.22.0.40:10.22.0.40,10.22.0.39:10.22.0.39,
\
10.22.1.39:10.22.1.39,10.22.3.120:10.22.3.120,10.22.3.178:10.22.3.178,10.22.1.100:10.22.1.100,10.22.1.98:10.22.1.98,10.22.1.97:10.22.1.97
mapping.fk.host_map
=
172.31.21.197:3.122.120.90,172.31.30.85:35.159.16.74,172.31.31.36:3.121.69.130,172.31.23.22:18.193.88.136,
\
172.31.31.83:18.184.171.197,172.31.23.232:3.65.211.221,172.31.3.81:3.120.203.124,172.31.29.20:3.65.116.29,172.31.30.96:54.93.86.92,
\
172.31.20.225:3.65.111.184,172.31.22.97:18.195.203.195,172.31.17.2:18.192.124.88,172.31.26.198:18.196.157.191,172.31.10.218:3.65.136.90
\ No newline at end of file
mapping.aliyun.host_map
=
10.22.1.188:10.22.1.188,10.22.1.187:10.22.1.187,10.22.1.186:10.22.1.186
mapping.fk.host_map
=
172.31.26.93:18.159.113.192,172.31.18.112:3.126.116.164,172.31.30.65:3.126.249.98
\ No newline at end of file
src/main/scala/mobvista/dmp/datasource/taobao/UCOtherDataToDmpV2.scala
View file @
fd0560d0
package
mobvista.dmp.datasource.taobao
import
mobvista.dmp.common.
{
CommonSparkJob
,
MobvistaConstant
}
import
mobvista.dmp.format.RDDMultipleOutputFormat
import
mobvista.dmp.util.MRUtils
import
org.apache.commons.cli.Options
import
org.apache.hadoop.fs.
{
FileSystem
,
Path
}
import
org.apache.hadoop.io.SequenceFile.CompressionType
import
org.apache.hadoop.io.Text
import
org.apache.hadoop.io.compress.
{
CompressionCodec
,
GzipCodec
}
import
org.apache.spark.s
ql.SaveMode
import
org.apache.spark.s
torage.StorageLevel
import
java.net.URI
import
scala.collection.mutable
import
scala.collection.mutable.ArrayBuffer
class
UCOtherDataToDmpV2
extends
CommonSparkJob
with
Serializable
{
...
...
@@ -34,6 +39,7 @@ class UCOtherDataToDmpV2 extends CommonSparkJob with Serializable {
val
spark
=
MobvistaConstant
.
createSparkSession
(
"UCOtherDataToDmp"
)
mutable
.
WrappedArray
val
sc
=
spark
.
sparkContext
FileSystem
.
get
(
new
URI
(
s
"s3://mob-emr-test"
),
sc
.
hadoopConfiguration
).
delete
(
new
Path
(
output
),
true
)
...
...
@@ -64,42 +70,33 @@ class UCOtherDataToDmpV2 extends CommonSparkJob with Serializable {
| GROUP BY device_id
|"""
.
stripMargin
/*
val
df
=
spark
.
sql
(
sql
).
persist
(
StorageLevel
.
MEMORY_AND_DISK_SER
)
val
rdd
=
df
.
rdd
.
map
(
r
=>
{
val
arrayBuffer
=
new
ArrayBuffer
[(
Text
,
Text
)]()
val
deviceId
=
r
.
getAs
[
String
](
"device_id"
)
val
deviceType
=
"imeimd5"
val
platform
=
"android"
val
installList
=
r
.
getAs
[
mutable.WrappedArray
[
String
]](
"install_list"
)
if
(
installList
.
contains
(
"com.uc.foractivation.4b5a58"
)
&&
installList
.
contains
(
"com.UCMobile_bes"
))
{
(new Text(s"$output/4b5a58_ucbes"), new Text(MRUtils.JOINER.join(deviceId, deviceType, platform, "com.uc.foractivation.4b5a58_ucbes", update
)))
arrayBuffer
+=
((
new
Text
(
s
"$output/4b5a58_ucbes"
),
new
Text
(
MRUtils
.
JOINER
.
join
(
deviceId
,
deviceType
,
platform
,
"com.uc.foractivation.4b5a58_ucbes"
,
update
)
)))
}
if
(
installList
.
contains
(
"com.uc.foractivation.d3f521"
)
&&
installList
.
contains
(
"com.UCMobile_bes"
))
{
(new Text(s"$output/d3f521_ucbes"), new Text(MRUtils.JOINER.join(deviceId, deviceType, platform, "com.uc.foractivation.d3f521_ucbes", update
)))
arrayBuffer
+=
((
new
Text
(
s
"$output/d3f521_ucbes"
),
new
Text
(
MRUtils
.
JOINER
.
join
(
deviceId
,
deviceType
,
platform
,
"com.uc.foractivation.d3f521_ucbes"
,
update
)
)))
}
if
(
installList
.
contains
(
"com.uc.foractivation.4b5a58"
)
&&
installList
.
contains
(
"com.ucmobile_oppo"
))
{
(new Text(s"$output/4b5a58_ucoppo"), new Text(MRUtils.JOINER.join(deviceId, deviceType, platform, "com.uc.foractivation.4b5a58_ucoppo", update
)))
arrayBuffer
+=
((
new
Text
(
s
"$output/4b5a58_ucoppo"
),
new
Text
(
MRUtils
.
JOINER
.
join
(
deviceId
,
deviceType
,
platform
,
"com.uc.foractivation.4b5a58_ucoppo"
,
update
)
)))
}
if
(
installList
.
contains
(
"com.uc.foractivation.d3f521"
)
&&
installList
.
contains
(
"com.ucmobile_oppo"
))
{
(new Text(s"$output/d3f521_ucoppo"), new Text(MRUtils.JOINER.join(deviceId, deviceType, platform, "com.uc.foractivation.d3f521_ucoppo", update)))
} else {
(new Text(""), new Text(""))
arrayBuffer
+=
((
new
Text
(
s
"$output/d3f521_ucoppo"
),
new
Text
(
MRUtils
.
JOINER
.
join
(
deviceId
,
deviceType
,
platform
,
"com.uc.foractivation.d3f521_ucoppo"
,
update
))))
}
}).filter(t => {
StringUtils.isNotBlank(t._1.toString) && StringUtils.isNotBlank(t._2.toString)
arrayBuffer
}).
flatMap
(
l
=>
{
l
})
println(s"count -->> ${rdd.count()}")
rdd
.
coalesce
(
50
)
.
saveAsNewAPIHadoopFile
(
output
,
classOf
[
Text
],
classOf
[
Text
],
classOf
[
RDDMultipleOutputFormat
[
_
,
_
]],
conf
)
*/
val
df
=
spark
.
sql
(
sql
)
df
.
write
.
mode
(
SaveMode
.
Overwrite
)
.
option
(
"orc.compress"
,
"zlib"
)
.
orc
(
output
)
}
finally
{
spark
.
stop
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment