set hive.skewjoin.key=10000; set hive.optimize.skewjoin=true; set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.merge.mapredfiles=true; set hive.auto.convert.sortmerge.join=true; insert overwrite table tkdm.tkdm_base_device_pay_info partition(ds='2017-02-24') select nvl(b.appid,a.appid) as appid, nvl(b.deviceid,a.deviceid) as deviceid, nvl(b.first_payment_ds,'2017-02-24') as first_payment_ds, if(isnull(a.deviceid),'2017-02-24',nvl(b.last_payment_ds,'1970-01-01')) as last_payment_ds, nvl(b.num_payment_cnt,0)+nvl(a.num_payment_cnt,0) as num_payment_cnt, nvl(b.num_payment_day,0)+nvl(a.num_payment_day,0) as num_payment_day, round(nvl(b.amt_payment,0) + nvl(a.amt_payment,0),2) as amt_payment, coalesce(a.ins_datetime,b.ins_datetime,'1970-01-01') as ins_datetime, nvl(a.amt_payment,0) as amt_payment_newest from (select appid, deviceid, ins_datetime, amt_payment, num_payment_cnt, 1 as num_payment_day from tkdm.tkdm_data_payment_detial_day where ds='2017-02-24' and deviceid!='00000000-0000-0000-0000-000000000000' and deviceid!='' and deviceid!='0' ) a full join (select appid, deviceid, first_payment_ds, last_payment_ds, num_payment_cnt, num_payment_day, amt_payment, ins_datetime from tkdm.tkdm_base_device_pay_info where ds= date_sub('2017-02-24',1) ) b on a.appid=b.appid and a.deviceid=b.deviceid cluster by deviceid use tkdm; create EXTERNAL table tkdm_base_device_pay_info( appid string, deviceid string, first_payment_ds string, last_payment_ds string, num_payment_cnt int, num_payment_day int, amt_payment double, ins_datetime string, amt_payment_newest double ) PARTITIONED BY (ds string) clustered by (deviceid) sorted by(deviceid,appid) into 32 buckets ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS ORC location 's3://reyuntkio/warehouse/tkio/tkdm.db/tkdm_base_device_pay_info' ;