set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; set mapred.min.split.size.per.rack=100000000; set mapred.min.split.size.per.node=100000000; set mapred.max.split.size=256000000; set hive.merge.mapfiles = true; set hive.merge.smallfiles.avgsize=16000000; insert overwrite table tkdm.tkdm_data_active_detial_day partition(ds='2017-02-25') select /*+mapjoin(b)*/ a.appid, a.deviceid, a.cid, a.ins_date, nvl(b.category_id,-1) as category_id, nvl(b.isgame,-1) as isgame, nvl(b.platform,'-1') as platform from (select appid, deviceid, cid, to_date(ins_datetime) as ins_date from tkio.tkio_mid_install_day where ds='2017-02-25' ) a left join (select appid, category_id, isgame, platform from tkio_v2.tkio_dp_app_info where ds='2017-02-25' ) b on a.appid=b.appid cluster by deviceid,appid create EXTERNAL table tkdm_data_active_detial_day( appid string, deviceid string, cid int, ins_date string, category_id int, isgame int, platform string ) PARTITIONED BY (ds string) clustered by (deviceid) sorted by(deviceid,appid) into 4 buckets ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS ORC location 's3://reyuntkio/warehouse/tkio/tkdm.db/tkdm_data_active_detial_day' ;