tkdm_base_device_pay_info.sql 1.94 KB
Newer Older
1 2
set hive.ignore.mapjoin.hint=flase;
set hive.skewjoin.key=10000;
mengdongxing committed
3 4 5 6
set hive.optimize.skewjoin=true;
set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
7
set hive.merge.mapredfiles=true;
mengdongxing committed
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30


insert overwrite table tkdm.tkdm_base_device_pay_info partition(ds='2017-02-23')
select /*+mapjoin(a)*/ nvl(b.appid,a.appid) as appid,
       nvl(b.deviceid,a.deviceid) as deviceid,
       nvl(b.first_payment_ds,'2017-02-23') as first_payment_ds,
       if(isnull(a.deviceid),'2017-02-23',nvl(b.last_payment_ds,'1970-01-01')) as last_payment_ds,
       nvl(b.num_payment_cnt,0)+nvl(a.num_payment_cnt,0) as num_payment_cnt,
       nvl(b.num_payment_day,0)+nvl(a.num_payment_day,0) as num_payment_day,
       round(nvl(b.amt_payment,0) + nvl(a.amt_payment,0),2) as amt_payment
from 
(select appid,
        deviceid,
        round(sum(amount),2) as amt_payment,
        count(1) as num_payment_cnt,
        1 as num_payment_day
   from tkio.tkio_mid_pay_day
   where ds='2017-02-23' and 
         deviceid!='00000000-0000-0000-0000-000000000000'
         and deviceid!='' and deviceid!='0'
   group by appid,deviceid
   distribute by appid ,deviceid
) a 
31
  join 
mengdongxing committed
32 33 34 35 36 37 38 39 40 41 42 43 44 45

(select appid,
       deviceid,
       first_payment_ds,
       last_payment_ds,
       num_payment_cnt,
       num_payment_day,
       amt_payment
    from tkdm.tkdm_base_device_pay_info
    where ds= date_sub('2017-02-23',1)
) b on a.appid=b.appid and a.deviceid=b.deviceid



46 47


mengdongxing committed
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
use tkdm;


create EXTERNAL table tkdm_base_device_pay_info(
appid string,
deviceid string,
first_payment_ds string,
last_payment_ds string,
num_payment_cnt int,
num_payment_day int,
amt_payment double
)
PARTITIONED BY (ds string)
clustered by (appid,deviceid) sorted by(appid,deviceid)  into 32 buckets
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY '\t'
STORED AS ORC
location 's3://reyuntkio/warehouse/tkio/tkdm.db/tkdm_base_device_pay_info'
66 67
;