USE dwh;
SET hive.groupby.skewindata=true;
SET hive.mapjoin.smalltable.filesize=25000000;
SET mapreduce.map.memory.mb=4096;
SET mapreduce.map.java.opts=-Xmx8192m;
SET mapreduce.reduce.memory.mb=4096;
SET mapreduce.reduce.java.opts=-Xmx8192m;
DROP TABLE IF EXISTS package_num;
CREATE TABLE package_num AS SELECT package_name,count(1) install_num FROM dwh.dm_install_list_v2 WHERE dt = '${dt}' GROUP BY package_name
HAVING count(1) > 30000 ORDER BY install_num DESC LIMIT 5000;
DROP TABLE IF EXISTS package_info;
CREATE TABLE package_info AS
SELECT package_name,substring(app_name,0,100) AS app_name,category_list,track_view_url,'IOS' AS platform
FROM dwh.dim_app_info_ios WHERE CONCAT(year,month,day) = '${dt}'
UNION ALL
SELECT package_name,substring(app_name,0,100) AS app_name,category_list,track_view_url,'Android' AS platform
FROM dwh.dim_app_info_adr WHERE CONCAT(year,month,day) = '${dt}';
DROP TABLE IF EXISTS package_list;
CREATE TABLE package_list AS
SELECT /*+ mapjoin(t)*/ p.package_name,p.app_name,p.platform,p.track_view_url,p.category_list,t.install_num
FROM package_num t
JOIN package_info p
ON t.package_name = p.package_name;