app_start.sql 1.35 KB
Newer Older
wangjf committed
1 2 3 4 5 6 7
USE dwh;
SET hive.groupby.skewindata=true;
SET hive.mapjoin.smalltable.filesize=25000000;
SET mapreduce.map.memory.mb=4096;
SET mapreduce.map.java.opts=-Xmx8192m;
SET mapreduce.reduce.memory.mb=4096;
SET mapreduce.reduce.java.opts=-Xmx8192m;
wangjf committed
8
SET hive.mapred.mode=nonstrict;
wangjf committed
9 10

DROP TABLE IF EXISTS package_num;
wangjf committed
11
CREATE TABLE package_num AS SELECT package_name,count(1) install_num FROM dwh.dm_install_list_v2
wangjf committed
12 13
    WHERE dt = '${dt}' AND update_date BETWEEN '${start_dt}' AND '${end_dt}'
    GROUP BY package_name HAVING count(1) > 30000;
wangjf committed
14

wangjf committed
15
DROP TABLE IF EXISTS package_info;
wangjf committed
16
CREATE TABLE package_info AS 
wangjf committed
17
    SELECT package_name,substring(app_name,0,100) AS app_name,category_list,track_view_url,'IOS' AS platform 
wangjf committed
18 19
    FROM dwh.dim_app_info_ios WHERE CONCAT(year,month,day) = '${dt}' 
    UNION ALL 
wangjf committed
20
    SELECT package_name,substring(app_name,0,100) AS app_name,category_list,track_view_url,'Android' AS platform 
wangjf committed
21 22
    FROM dwh.dim_app_info_adr WHERE CONCAT(year,month,day) = '${dt}';

wangjf committed
23 24
DROP TABLE IF EXISTS package_list;
CREATE TABLE package_list AS 
wangjf committed
25
    SELECT /*+ mapjoin(t)*/ p.package_name,p.app_name,p.platform,p.track_view_url,p.category_list,t.install_num
wangjf committed
26
    FROM (SELECT * FROM package_num WHERE package_name NOT IN (SELECT DISTINCT package_name FROM dwh.dim_package_tags)) t
wangjf committed
27
    JOIN package_info p 
wangjf committed
28
    ON t.package_name = p.package_name ORDER BY t.install_num DESC LIMIT 5000;