set hive.skewjoin.key=10000; set hive.optimize.skewjoin=true; set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.merge.mapredfiles=true; set hive.auto.convert.sortmerge.join=true; insert overwrite table tkdm.tkdm_base_active_payment_info partition(ds='2017-02-25') select x.appid, x.deviceid, x.cid, x.first_ins_date, x.last_ins_date, x.category_id, x.isgame, x.platform, nvl(x.first_payment_ds,y.payment_ds) as first_payment_ds, nvl(y.payment_ds,x.last_amt_payment) as last_payment_ds, x.num_payment_cnt + y.num_payment_cnt as num_payment_cnt, x.num_payment_day + y.num_payment_day as num_payment_day, x.amt_payment +y.amt_payment as amt_payment, nvl(y.amt_payment,x.last_amt_payment) as last_amt_payment from (select nvl(a.deviceid,b.deviceid) as deviceid, nvl(a.appid,b.appid) as appid, nvl(b.cid,a.cid) as cid, nvl(a.first_ins_date,b.ins_date) as first_ins_date, nvl(b.ins_date,a.last_ins_date) as last_ins_date, nvl(b.category_id,a.category_id) as category_id, nvl(b.isgame,a.isgame) as isgame, nvl(b.platform,a.platform) as platform, a.first_payment_ds, a.last_payment_ds, nvl(a.num_payment_cnt,0) as num_payment_cnt, nvl(a.num_payment_day,0) as num_payment_day, nvl(a.amt_payment,0) amt_payment, nvl(a.last_amt_payment,0) as last_amt_payment from (select deviceid, appid, cid, first_ins_date, last_ins_date, category_id, isgame, platform, first_payment_ds, last_payment_ds, num_payment_cnt, num_payment_day, amt_payment, last_amt_payment from tkdm.tkdm_base_active_payment_info where ds=date_sub('2017-02-25',1) ) a full join (select deviceid, appid, cid, ins_date, category_id, isgame, platform from tkdm.tkdm_data_active_detial_day where ds='2017-02-25' ) b on a.deviceid=b.deviceid and a.appid=b.appid ) x left join (select deviceid, appid, to_date(ins_datetime) as ins_date, '2017-02-25' as payment_ds, num_payment_cnt, 1 as num_payment_day, amt_payment from tkdm.tkdm_data_payment_detial_day where ds='2017-02-25' ) y on x.deviceid=y.deviceid and x.appid=y.appid cluster by deviceid,appid create EXTERNAL table tkdm_base_active_payment_info( appid string, deviceid string, cid int, first_ins_date string, last_ins_date string, category_id int, isgame int, platform string, first_payment_ds string, last_payment_ds string, num_payment_cnt int, num_payment_day int, amt_payment double, last_amt_payment double ) PARTITIONED BY (ds string) clustered by (deviceid) sorted by(deviceid,appid) into 32 buckets ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS ORC location 's3://reyuntkio/warehouse/tkio/tkdm.db/tkdm_base_active_payment_info' ;