USE dwh; SET hive.groupby.skewindata=true; SET hive.mapjoin.smalltable.filesize=25000000; SET mapreduce.map.memory.mb=4096; SET mapreduce.map.java.opts=-Xmx8192m; SET mapreduce.reduce.memory.mb=4096; SET mapreduce.reduce.java.opts=-Xmx8192m; SET hive.mapred.mode=nonstrict; DROP TABLE IF EXISTS package_num; CREATE TABLE package_num AS SELECT package_name,count(1) install_num FROM dwh.dm_install_list_v2 WHERE dt = '${dt}' AND update_date BETWEEN '${start_dt}' AND '${end_dt}' GROUP BY package_name HAVING count(1) > 30000; DROP TABLE IF EXISTS package_info; CREATE TABLE package_info AS SELECT package_name,substring(app_name,0,100) AS app_name,category_list,track_view_url,'IOS' AS platform FROM dwh.dim_app_info_ios WHERE CONCAT(year,month,day) = '${dt}' UNION ALL SELECT package_name,substring(app_name,0,100) AS app_name,category_list,track_view_url,'Android' AS platform FROM dwh.dim_app_info_adr WHERE CONCAT(year,month,day) = '${dt}'; DROP TABLE IF EXISTS package_list; CREATE TABLE package_list AS SELECT /*+ mapjoin(t)*/ p.package_name,p.app_name,p.platform,p.track_view_url,p.category_list,t.install_num FROM (SELECT * FROM package_num WHERE package_name NOT IN (SELECT DISTINCT package_name FROM dwh.dim_package_tags)) t JOIN package_info p ON t.package_name = p.package_name ORDER BY t.install_num DESC LIMIT 5000;