Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
mobvista-dmp
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
王金锋
mobvista-dmp
Commits
1076083f
Commit
1076083f
authored
Sep 13, 2021
by
WangJinfeng
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix no file on joypac logs
parent
c08cf707
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
30 additions
and
10 deletions
+30
-10
joypac_result_etl.sh
azkaban/joypac/joypac_result_etl.sh
+1
-0
JoypacResultEtl.scala
...cala/mobvista/dmp/datasource/joypac/JoypacResultEtl.scala
+16
-8
JoypcSdkDaily.scala
.../scala/mobvista/dmp/datasource/joypac/JoypcSdkDaily.scala
+13
-2
No files found.
azkaban/joypac/joypac_result_etl.sh
View file @
1076083f
...
...
@@ -25,6 +25,7 @@ spark-submit --class mobvista.dmp.datasource.joypac.JoypacResultEtl \
--conf
spark.yarn.executor.memoryOverhead
=
2048
\
--conf
spark.network.timeout
=
720s
\
--conf
spark.default.parallelism
=
20
\
--conf
spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive
=
true
\
--master
yarn
--deploy-mode
cluster
--name
JoypacResutlEtl
\
--executor-memory
4g
--driver-memory
4g
--executor-cores
2
--num-executors
5
\
--files
${
HIVE_SITE_PATH
}
\
...
...
src/main/scala/mobvista/dmp/datasource/joypac/JoypacResultEtl.scala
View file @
1076083f
package
mobvista.dmp.datasource.joypac
import
java.net.URI
import
com.google.gson.JsonObject
import
mobvista.dmp.common.CommonSparkJob
import
mobvista.dmp.datasource.dm.Constant.
{
allZero
,
andriodIdPtn
,
didPtn
,
imeiPtn
}
...
...
@@ -14,6 +12,8 @@ import org.apache.hadoop.fs.{FileSystem, Path}
import
org.apache.spark.sql.
{
SaveMode
,
SparkSession
}
import
ru.yandex.clickhouse.ClickHouseDataSource
import
java.net.URI
/**
* @package: mobvista.dmp.datasource.joypac
* @author: wangjf
...
...
@@ -123,12 +123,20 @@ class JoypacResultEtl extends CommonSparkJob {
j
!=
null
})
df
.
toDF
.
dropDuplicates
()
.
coalesce
(
Integer
.
parseInt
(
coalesce
)).
write
.
mode
(
SaveMode
.
Overwrite
)
.
option
(
"orc.compress"
,
"zlib"
)
.
orc
(
output
)
if
(
df
.
count
()
>
1
)
{
df
.
toDF
.
dropDuplicates
()
.
coalesce
(
Integer
.
parseInt
(
coalesce
)).
write
.
mode
(
SaveMode
.
Overwrite
)
.
option
(
"orc.compress"
,
"zlib"
)
.
orc
(
output
)
}
else
{
Seq
.
empty
[
JoypacEntity
].
toDF
.
coalesce
(
1
).
write
.
mode
(
SaveMode
.
Overwrite
)
.
option
(
"orc.compress"
,
"zlib"
)
.
orc
(
output
)
}
import
mobvista.dmp.utils.clickhouse.ClickHouseSparkExt._
val
clusterName
=
Some
(
cluster
)
:
Option
[
String
]
...
...
src/main/scala/mobvista/dmp/datasource/joypac/JoypcSdkDaily.scala
View file @
1076083f
...
...
@@ -7,6 +7,10 @@ import org.apache.spark.sql.{SparkSession, _};
* 刘凯 2019-02-18 15:20
* joypc_sdk fluentd数据接入至etl_joypc_sdk_daily表
*/
case
class
JoypcSdkDaily
(
id
:
String
,
idfa
:
String
,
app_version
:
String
,
brand
:
String
,
network_type
:
String
,
package_name
:
String
,
platform
:
String
,
language
:
String
,
os_version
:
String
,
app_version_code
:
String
,
model
:
String
,
time_zone
:
String
,
apps_info
:
String
,
time
:
String
)
object
JoypcSdkDaily
extends
Serializable
{
def
main
(
args
:
Array
[
String
])
{
val
spark
=
SparkSession
.
builder
()
...
...
@@ -16,7 +20,7 @@ object JoypcSdkDaily extends Serializable {
var
year
=
loadTime
.
substring
(
0
,
4
)
var
month
=
loadTime
.
substring
(
4
,
6
)
val
day
=
loadTime
.
substring
(
6
,
8
)
val
input_path
=
spark
.
conf
.
get
(
"spark.app.input_path"
)
val
input_path
=
spark
.
conf
.
get
(
"spark.app.input_path"
)
.
replace
(
"*"
,
""
)
val
output_path
=
spark
.
conf
.
get
(
"spark.app.output_path"
)
try
{
val
log_rdd
=
spark
.
sparkContext
...
...
@@ -98,7 +102,14 @@ object JoypcSdkDaily extends Serializable {
StructField
(
"time"
,
StringType
)
))
var
joypc_df
=
spark
.
createDataFrame
(
cal_rdd
,
joypc_schema
)
joypc_df
.
coalesce
(
100
).
write
.
format
(
"orc"
).
mode
(
"overwrite"
).
save
(
output_path
)
if
(
joypc_df
.
count
()
>
1
)
{
joypc_df
.
coalesce
(
100
).
write
.
format
(
"orc"
).
mode
(
"overwrite"
).
save
(
output_path
)
}
else
{
import
spark.implicits._
Seq
.
empty
[
JoypcSdkDaily
].
toDF
.
coalesce
(
1
).
write
.
format
(
"orc"
).
mode
(
"overwrite"
).
save
(
output_path
)
}
}
catch
{
case
e
:
Exception
=>
e
.
printStackTrace
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment