app_start.sql 1.16 KB
USE dwh;
SET hive.groupby.skewindata=true;
SET hive.mapjoin.smalltable.filesize=25000000;
SET mapreduce.map.memory.mb=4096;
SET mapreduce.map.java.opts=-Xmx8192m;
SET mapreduce.reduce.memory.mb=4096;
SET mapreduce.reduce.java.opts=-Xmx8192m;

DROP TABLE IF EXISTS package_num;
CREATE TABLE package_num AS SELECT package_name,count(1) install_num FROM dwh.dm_install_list_v2 WHERE dt = '${dt}' GROUP BY package_name
    HAVING count(1) > 30000 ORDER BY install_num DESC LIMIT 5000;

DROP TABLE IF EXISTS package_info;
CREATE TABLE package_info AS 
    SELECT package_name,substring(app_name,0,100) AS app_name,category_list,track_view_url,'IOS' AS platform 
    FROM dwh.dim_app_info_ios WHERE CONCAT(year,month,day) = '${dt}' 
    UNION ALL 
    SELECT package_name,substring(app_name,0,100) AS app_name,category_list,track_view_url,'Android' AS platform 
    FROM dwh.dim_app_info_adr WHERE CONCAT(year,month,day) = '${dt}';

DROP TABLE IF EXISTS package_list;
CREATE TABLE package_list AS 
    SELECT /*+ mapjoin(t)*/ p.package_name,p.app_name,p.platform,p.track_view_url,p.category_list,t.install_num
    FROM package_num t 
    JOIN package_info p 
    ON t.package_name = p.package_name;