set mapred.max.split.size=256000000; set mapred.min.split.size.per.node=256000000 set Mapred.min.split.size.per.rack=256000000 set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat set hive.groupby.skewindata=true; insert overwrite table tkdm.tkdm_output_user_cluster_top_all partition(ds='2017-02-25') select '2017-02-25' as dt, cid, category_id, isgame, num_user, top_rank from ( select cid, category_id, isgame, num_user, dense_rank() over(partition by category_id,isgame order by num_user desc ) as top_rank from ( select cid, category_id, isgame, count(1) as num_user from tkdm.tkdm_base_active_payment_info where ds='2017-02-25' and last_ins_date between add_months('2017-02-25',-2) and '2017-02-25' group by cid,category_id,isgame having count(1) >100 cluster by cid,category_id,isgame ) x ) y where top_rank <=100 create EXTERNAL table tkdm_output_user_cluster_top_all( dt string, cid int, category_id int, isgame int, num_user int, top_rank int ) PARTITIONED BY (ds string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS ORC location 's3://reyuntkio/warehouse/tkio/tkdm.db/tkdm_output_user_cluster_top_all' ;