Commit d3aa0d49 by wangjf

mysql2hive

parent 18006e53
No preview for this file type
...@@ -7,13 +7,8 @@ SET mapreduce.reduce.memory.mb=4096; ...@@ -7,13 +7,8 @@ SET mapreduce.reduce.memory.mb=4096;
SET mapreduce.reduce.java.opts=-Xmx8192m; SET mapreduce.reduce.java.opts=-Xmx8192m;
DROP TABLE IF EXISTS package_num; DROP TABLE IF EXISTS package_num;
CREATE TABLE package_num AS SELECT tt.package_name,tt.install_nums FROM CREATE TABLE package_num AS SELECT package_name,count(1) install_num FROM dwh.dm_install_list_v2 WHERE dt = '${dt}' GROUP BY package_name
(SELECT t1.package_name,(t1.install_num - (CASE WHEN t2.install_num IS NULL THEN 0 ELSE t2.install_num END)) AS install_nums HAVING count(1) > 30000 ORDER BY install_num DESC LIMIT 5000;
FROM (SELECT package_name,count(1) AS install_num FROM dwh.dm_install_list_v2 WHERE dt = '${dt}' GROUP BY package_name) AS t1
LEFT JOIN
(SELECT package_name,count(1) AS install_num FROM dwh.dm_install_list_v2 WHERE dt = '${last_dt}' GROUP BY package_name) AS t2
ON t1.package_name = t2.package_name
) tt WHERE tt.install_nums > 30000;
DROP TABLE IF EXISTS package_info; DROP TABLE IF EXISTS package_info;
CREATE TABLE package_info AS CREATE TABLE package_info AS
...@@ -25,7 +20,7 @@ CREATE TABLE package_info AS ...@@ -25,7 +20,7 @@ CREATE TABLE package_info AS
DROP TABLE IF EXISTS package_list; DROP TABLE IF EXISTS package_list;
CREATE TABLE package_list AS CREATE TABLE package_list AS
SELECT /*+ mapjoin(t)*/ p.package_name,p.app_name,p.platform,p.track_view_url,p.category_list,t.install_nums SELECT /*+ mapjoin(t)*/ p.package_name,p.app_name,p.platform,p.track_view_url,p.category_list,t.install_num
FROM package_num t FROM package_num t
JOIN package_info p JOIN package_info p
ON t.package_name = p.package_name; ON t.package_name = p.package_name;
\ No newline at end of file
...@@ -2,7 +2,17 @@ ...@@ -2,7 +2,17 @@
DT=$(date +%Y%m%d -d "-1 day $ScheduleTime") DT=$(date +%Y%m%d -d "-1 day $ScheduleTime")
sqoop import --connect jdbc:mysql://dataplatform-app-tag.c5yzcdreb1xr.us-east-1.rds.amazonaws.com:3306/app_tag \ sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" --connect jdbc:mysql://dataplatform-app-tag.c5yzcdreb1xr.us-east-1.rds.amazonaws.com:3306/app_tag \
--username apptag_rw --password 7gyLEVtkER3u8c9 --table tag_list --input-fields-terminated-by '\t' --lines-terminated-by '\n' \ --username apptag_rw --password 7gyLEVtkER3u8c9 --table tag_result --columns 'package_name,app_name,platform,feat_id' -m 1 --null-string '\\N' --null-non-string '\\N' \
--hive-import --hive-overwrite --create-hive-table --hive-table dwh.app_tag --null-string '\\N' --null-non-string '\\N' \ --target-dir 's3://mob-emr-test/wangjf/data/dwh/app_tag' --fields-terminated-by '\001' --delete-target-dir
--compression-codec "com.hadoop.compression.lzo.LzopCodec"
\ No newline at end of file # USE dev;
# CREATE EXTERNAL TABLE app_tag(
# package_name string,
# app_name string,
# platform string,
# category string
# ) ROW FORMAT DELIMITED
# FIELDS TERMINATED BY '\001'
# STORED AS TEXTFILE
# LOCATION 's3://mob-emr-test/wangjf/data/dwh/app_tag';
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment