set mapred.max.split.size=256000000; set mapred.min.split.size.per.node=256000000 set Mapred.min.split.size.per.rack=256000000 set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat set hive.groupby.skewindata=true; insert overwrite table tkdm.tkdm_output_user_cluster_app_day partition(ds='2017-02-25') select '2017-02-25' as dt, appid, category_id, isgame, count(1) as num_user from tkdm.tkdm_data_active_detial_day where ds='2017-02-25' group by appid,category_id,isgame cluster by appid,category_id,isgame create EXTERNAL table tkdm_output_user_cluster_app_day( dt string, appid int, category_id int, isgame int, num_user int ) PARTITIONED BY (ds string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS ORC location 's3://reyuntkio/warehouse/tkio/tkdm.db/tkdm_output_user_cluster_app_day' ;