Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
AppTag
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dataplatform
AppTag
Commits
d3aa0d49
Commit
d3aa0d49
authored
Nov 15, 2018
by
wangjf
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
mysql2hive
parent
18006e53
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
19 additions
and
13 deletions
+19
-13
azkaban.zip
azkaban.zip
+0
-0
app_start.sql
azkaban/app_start.sql
+4
-8
mysql2hive.sh
azkaban/mysql2hive.sh
+15
-5
No files found.
azkaban.zip
View file @
d3aa0d49
No preview for this file type
azkaban/app_start.sql
View file @
d3aa0d49
...
@@ -7,13 +7,8 @@ SET mapreduce.reduce.memory.mb=4096;
...
@@ -7,13 +7,8 @@ SET mapreduce.reduce.memory.mb=4096;
SET
mapreduce
.
reduce
.
java
.
opts
=-
Xmx8192m
;
SET
mapreduce
.
reduce
.
java
.
opts
=-
Xmx8192m
;
DROP
TABLE
IF
EXISTS
package_num
;
DROP
TABLE
IF
EXISTS
package_num
;
CREATE
TABLE
package_num
AS
SELECT
tt
.
package_name
,
tt
.
install_nums
FROM
CREATE
TABLE
package_num
AS
SELECT
package_name
,
count
(
1
)
install_num
FROM
dwh
.
dm_install_list_v2
WHERE
dt
=
'${dt}'
GROUP
BY
package_name
(
SELECT
t1
.
package_name
,(
t1
.
install_num
-
(
CASE
WHEN
t2
.
install_num
IS
NULL
THEN
0
ELSE
t2
.
install_num
END
))
AS
install_nums
HAVING
count
(
1
)
>
30000
ORDER
BY
install_num
DESC
LIMIT
5000
;
FROM
(
SELECT
package_name
,
count
(
1
)
AS
install_num
FROM
dwh
.
dm_install_list_v2
WHERE
dt
=
'${dt}'
GROUP
BY
package_name
)
AS
t1
LEFT
JOIN
(
SELECT
package_name
,
count
(
1
)
AS
install_num
FROM
dwh
.
dm_install_list_v2
WHERE
dt
=
'${last_dt}'
GROUP
BY
package_name
)
AS
t2
ON
t1
.
package_name
=
t2
.
package_name
)
tt
WHERE
tt
.
install_nums
>
30000
;
DROP
TABLE
IF
EXISTS
package_info
;
DROP
TABLE
IF
EXISTS
package_info
;
CREATE
TABLE
package_info
AS
CREATE
TABLE
package_info
AS
...
@@ -25,7 +20,7 @@ CREATE TABLE package_info AS
...
@@ -25,7 +20,7 @@ CREATE TABLE package_info AS
DROP
TABLE
IF
EXISTS
package_list
;
DROP
TABLE
IF
EXISTS
package_list
;
CREATE
TABLE
package_list
AS
CREATE
TABLE
package_list
AS
SELECT
/*+ mapjoin(t)*/
p
.
package_name
,
p
.
app_name
,
p
.
platform
,
p
.
track_view_url
,
p
.
category_list
,
t
.
install_num
s
SELECT
/*+ mapjoin(t)*/
p
.
package_name
,
p
.
app_name
,
p
.
platform
,
p
.
track_view_url
,
p
.
category_list
,
t
.
install_num
FROM
package_num
t
FROM
package_num
t
JOIN
package_info
p
JOIN
package_info
p
ON
t
.
package_name
=
p
.
package_name
;
ON
t
.
package_name
=
p
.
package_name
;
\ No newline at end of file
azkaban/mysql2hive.sh
View file @
d3aa0d49
...
@@ -2,7 +2,17 @@
...
@@ -2,7 +2,17 @@
DT
=
$(
date
+%Y%m%d
-d
"-1 day
$ScheduleTime
"
)
DT
=
$(
date
+%Y%m%d
-d
"-1 day
$ScheduleTime
"
)
sqoop import
--connect
jdbc:mysql://dataplatform-app-tag.c5yzcdreb1xr.us-east-1.rds.amazonaws.com:3306/app_tag
\
sqoop import
"-Dorg.apache.sqoop.splitter.allow_text_splitter=true"
--connect
jdbc:mysql://dataplatform-app-tag.c5yzcdreb1xr.us-east-1.rds.amazonaws.com:3306/app_tag
\
--username
apptag_rw
--password
7gyLEVtkER3u8c9
--table
tag_list
--input-fields-terminated-by
'\t'
--lines-terminated-by
'\n'
\
--username
apptag_rw
--password
7gyLEVtkER3u8c9
--table
tag_result
--columns
'package_name,app_name,platform,feat_id'
-m
1
--null-string
'\\N'
--null-non-string
'\\N'
\
--hive-import
--hive-overwrite
--create-hive-table
--hive-table
dwh.app_tag
--null-string
'\\N'
--null-non-string
'\\N'
\
--target-dir
's3://mob-emr-test/wangjf/data/dwh/app_tag'
--fields-terminated-by
'\001'
--delete-target-dir
--compression-codec
"com.hadoop.compression.lzo.LzopCodec"
\ No newline at end of file
# USE dev;
# CREATE EXTERNAL TABLE app_tag(
# package_name string,
# app_name string,
# platform string,
# category string
# ) ROW FORMAT DELIMITED
# FIELDS TERMINATED BY '\001'
# STORED AS TEXTFILE
# LOCATION 's3://mob-emr-test/wangjf/data/dwh/app_tag';
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment