tkdm_output_reten_app_std_week.sql 1.51 KB
Newer Older
mengdongxing committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
add jar /mnt1/testdata/udf-0.0.1.jar;
CREATE TEMPORARY FUNCTION bootstrapmedian AS 'com.reyun.hive.udaf.BootStrap';

insert overwrite table tkdm.tkdm_output_reten_app_std_week partition(ds='2017-02-22') 

select /*+ mapjoin(x)*/ x.appid,
       x.category_id,
       x.isgame,
       nvl(y.avg_d1_rate,-1)  as avg_ins_rate,
       nvl(y.approx_med_d1_rate,-1) as approx_med_ins_rate,
       nvl(y.bootstarp_med_d1_rate,-1) as bootstarp_med_ins_rate
from 
(select category_id, 
       isgame,
       round(avg(d1_rate), 4) avg_d1_rate,
       round(percentile_approx(d1_rate,0.5),4) approx_med_d1_rate,
       round(bootstrapmedian(d1_rate),4) bootstarp_med_d1_rate
    from (
       select  appid,
               category_id,
               isgame ,
               ds,
               round(sum(d1_user) / sum(num_user),4) as d1_rate
24
           from tkdm.tkdm_data_retention_summary_day
mengdongxing committed
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
           where ds between '2017-02-22' and '2017-02-28'
           group by appid,category_id,isgame,ds
       ) as tt 
    group by category_id, isgame
) y 

right join 

(select appid,category_id,isgame
    from tkio_v2.tkio_dp_app_info
    where ds='2017-02-28' 
) as x on x.category_id=y.category_id and x.isgame=y.isgame


create EXTERNAL table tkdm_output_reten_app_std_week(
appid string,
category_id int,
isgame int,
avg_d1_rate double,
approx_d1_ins_rate double,
bootstarp_d1_ins_rate double
)
PARTITIONED BY (ds string)
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY '\t'
location 's3://reyuntkio/warehouse/tkio/tkdm.db/tkdm_output_reten_app_std_week'
;