Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
mobvista-dmp
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
王金锋
mobvista-dmp
Commits
d7eba63d
Commit
d7eba63d
authored
Jul 02, 2021
by
fan.jiang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
热云标签测试
parent
0e1c8204
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
124 additions
and
0 deletions
+124
-0
reyun_label_test.job
azkaban/ali/reyun/reyun_label_test.job
+3
-0
reyun_label_test.sh
azkaban/ali/reyun/reyun_label_test.sh
+37
-0
dmp_env.sh
azkaban/dmp_env.sh
+3
-0
ReyunLabelTest.scala
.../scala/mobvista/dmp/datasource/reyun/ReyunLabelTest.scala
+81
-0
No files found.
azkaban/ali/reyun/reyun_label_test.job
0 → 100644
View file @
d7eba63d
type=command
command=sh -x reyun_label_test.sh
\ No newline at end of file
azkaban/ali/reyun/reyun_label_test.sh
0 → 100644
View file @
d7eba63d
#!/bin/sh
# # # # # # # # # # # # # # # # # # # # # #
# @author : jiangfan
# @date : 2021-07-01 12:06:00
# # # # # # # # # # # # # # # # # # # # # #
#!/usr/bin/env bash
source
../../dmp_env.sh
dt_today
=
$(
date
-d
"
$ScheduleTime
1 days ago"
+
"%Y%m%d"
)
dt_slash_today
=
$(
date
-d
"
$ScheduleTime
1 days ago"
+
"%Y/%m/%d"
)
INPUT_PATH
=
"
${
ODS_DMP_USER_INFO_DAILY
}
/
${
dt_today
}
"
check_await
"
${
INPUT_PATH
}
/_SUCCESS"
OUTPUT_PATH
=
"
${
REYUN_LABEL_TEST_DAILY_PATH
}
/
${
dt_slash_today
}
"
hadoop fs
-rm
-r
"
${
OUTPUT_PATH
}
"
spark-submit
--class
mobvista.dmp.datasource.reyun.ReyunLabelTest
\
--conf
spark.network.timeout
=
720s
\
--conf
spark.driver.maxResultSize
=
4g
\
--conf
spark.default.parallelism
=
2000
\
--conf
spark.sql.shuffle.partitions
=
2000
\
--conf
spark.sql.broadcastTimeout
=
1200
\
--conf
spark.sql.autoBroadcastJoinThreshold
=
31457280
\
--files
${
HIVE_SITE_PATH
}
\
--master
yarn
--deploy-mode
cluster
--executor-memory
6g
--driver-memory
4g
--executor-cores
3
--num-executors
40
\
../../
${
JAR
}
\
-output
${
OUTPUT_PATH
}
-coalesce
40
-dt_today
${
dt_today
}
if
[[
$?
-ne
0
]]
;
then
exit
255
fi
azkaban/dmp_env.sh
View file @
d7eba63d
...
@@ -356,6 +356,9 @@ BTOP_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_btop_
...
@@ -356,6 +356,9 @@ BTOP_DAILY_PATH="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_btop_
REYUN_DAILY_PATH
=
"s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_reyun_daily"
REYUN_DAILY_PATH
=
"s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_reyun_daily"
REYUN_RAW_DATA
=
"s3://mob-emr-test/reyun/pkginfo"
REYUN_RAW_DATA
=
"s3://mob-emr-test/reyun/pkginfo"
# reyun business tmp data
REYUN_LABEL_TEST_DAILY_PATH
=
"s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/tmp/reyun_label_test_daily"
# alipay_activation business tmp data
# alipay_activation business tmp data
ALIPAY_ACTIVATION_DAILY_PATH
=
"s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_alipay_activation_daily"
ALIPAY_ACTIVATION_DAILY_PATH
=
"s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/etl_alipay_activation_daily"
...
...
src/main/scala/mobvista/dmp/datasource/reyun/ReyunLabelTest.scala
0 → 100644
View file @
d7eba63d
package
mobvista.dmp.datasource.reyun
import
mobvista.dmp.common.CommonSparkJob
import
mobvista.dmp.util.DateUtil
import
org.apache.commons.cli.Options
import
org.apache.hadoop.fs.
{
FileSystem
,
Path
}
import
org.apache.spark.sql.
{
SaveMode
,
SparkSession
}
import
java.net.URI
/**
* @author jiangfan
* @date 2021/7/1 14:59
*/
class
ReyunLabelTest
extends
CommonSparkJob
with
Serializable
{
override
protected
def
buildOptions
()
:
Options
=
{
val
options
=
new
Options
options
.
addOption
(
"coalesce"
,
true
,
"[must] coalesce"
)
options
.
addOption
(
"output"
,
true
,
"[must] output"
)
options
.
addOption
(
"dt_today"
,
true
,
"[must] dt_today"
)
options
}
override
protected
def
run
(
args
:
Array
[
String
])
:
Int
=
{
val
commandLine
=
commParser
.
parse
(
options
,
args
)
if
(!
checkMustOption
(
commandLine
))
{
printUsage
(
options
)
return
-
1
}
else
printOptions
(
commandLine
)
val
coalesce
=
commandLine
.
getOptionValue
(
"coalesce"
)
val
output
=
commandLine
.
getOptionValue
(
"output"
)
val
dt_today
=
commandLine
.
getOptionValue
(
"dt_today"
)
val
spark
=
SparkSession
.
builder
()
.
appName
(
"ReyunLabelTest"
)
.
config
(
"spark.rdd.compress"
,
"true"
)
.
config
(
"spark.io.compression.codec"
,
"snappy"
)
.
config
(
"spark.sql.orc.filterPushdown"
,
"true"
)
.
config
(
"spark.sql.warehouse.dir"
,
"s3://mob-emr-test/spark-warehouse"
)
.
config
(
"spark.serializer"
,
"org.apache.spark.serializer.KryoSerializer"
)
.
enableHiveSupport
()
.
getOrCreate
()
val
sc
=
spark
.
sparkContext
import
spark.implicits._
FileSystem
.
get
(
new
URI
(
s
"s3://mob-emr-test"
),
spark
.
sparkContext
.
hadoopConfiguration
).
delete
(
new
Path
(
output
),
true
)
val
one_day_ago
=
DateUtil
.
getDayByString
(
dt_today
,
"yyyyMMdd"
,
-
1
)
val
two_days_ago
=
DateUtil
.
getDayByString
(
dt_today
,
"yyyyMMdd"
,
-
2
)
try
{
val
sql1
=
s
"""
|select md5(dev_id) from dwh.ods_dmp_user_info_daily
|where dt in ('${dt_today}','${one_day_ago}','${two_days_ago}')
|and platform='ios' and lower(country)='cn'
|and (osversion like '14%' or osversion like '15%' or osversion like '16%') limit 120000000
"""
.
stripMargin
println
(
"sql=============="
+
sql1
)
spark
.
sql
(
sql1
).
rdd
.
map
(
_
.
mkString
).
coalesce
(
coalesce
.
toInt
).
saveAsTextFile
(
output
)
}
finally
{
spark
.
stop
()
}
0
}
}
object
ReyunLabelTest
{
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
new
ReyunLabelTest
().
run
(
args
)
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment