USE dwh;
SET hive.groupby.skewindata=true;
SET hive.mapjoin.smalltable.filesize=25000000;
SET mapreduce.map.memory.mb=4096;
SET mapreduce.map.java.opts=-Xmx8192m;
SET mapreduce.reduce.memory.mb=4096;
SET mapreduce.reduce.java.opts=-Xmx8192m;
SET hive.mapred.mode=nonstrict;

DROP TABLE IF EXISTS package_num;
CREATE TABLE package_num AS SELECT package_name,count(1) install_num FROM dwh.dm_install_list_v2
    WHERE dt = '${dt}' AND update_date BETWEEN '${start_dt}' AND '${end_dt}'
    GROUP BY package_name HAVING count(1) > 30000;

DROP TABLE IF EXISTS package_info;
CREATE TABLE package_info AS 
    SELECT package_name,substring(app_name,0,100) AS app_name,category_list,track_view_url,'IOS' AS platform 
    FROM dwh.dim_app_info_ios WHERE CONCAT(year,month,day) = '${dt}' 
    UNION ALL 
    SELECT package_name,substring(app_name,0,100) AS app_name,category_list,track_view_url,'Android' AS platform 
    FROM dwh.dim_app_info_adr WHERE CONCAT(year,month,day) = '${dt}';

DROP TABLE IF EXISTS package_list;
CREATE TABLE package_list AS 
    SELECT /*+ mapjoin(t)*/ p.package_name,p.app_name,p.platform,p.track_view_url,p.category_list,t.install_num
    FROM (SELECT * FROM package_num WHERE package_name NOT IN (SELECT DISTINCT package_name FROM dwh.dim_package_tags)) t
    JOIN package_info p 
    ON t.package_name = p.package_name ORDER BY t.install_num DESC LIMIT 5000;