tkdm_output_active_cid_std_week.sql 1.37 KB
add jar /mnt1/testdata/udf-0.0.1.jar;
CREATE TEMPORARY FUNCTION bootstrapmedian AS 'com.reyun.hive.udaf.BootStrap';

insert overwrite table tkdm.tkdm_output_active_cid_std_week partition(ds='2017-02-22') 


select /*+ mapjoin(x)*/ x.appid,
       '2017-02-22' as week_dt,
       y.cid,
       x.category_id,
       x.isgame,
       nvl(y.avg_ins_rate,-1)  as avg_ins_rate,
       nvl(y.approx_med_ins_rate,-1) as approx_med_ins_rate,
       nvl(y.bootstarp_med_ins_rate,-1) as bootstarp_med_ins_rate

from 

(select appid,category_id,isgame
    from tkio_v2.tkio_dp_app_info
    where ds='2017-02-28' 
) as x

left join 

(select category_id, 
        isgame,
        cid,
        round(avg(ins_rate), 4) avg_ins_rate,
        round(percentile_approx(ins_rate,0.5),4) approx_med_ins_rate,
        round(bootstrapmedian(ins_rate),4) bootstarp_med_ins_rate
    from tkdm.tkdm_data_app_summary_day
    where ds between '2017-02-22' and '2017-02-28'
    group by category_id, isgame,cid
) y on x.category_id=y.category_id and x.isgame=y.isgame




create EXTERNAL table tkdm_output_active_cid_std_week(
appid string,
week_dt string,
cid int,
category_id int,
isgame int,
avg_ins_rate double,
approx_med_ins_rate double,
bootstarp_med_ins_rate double
)
PARTITIONED BY (ds string)
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY '\t'
location 's3://reyuntkio/warehouse/tkio/tkdm.db/tkdm_output_active_cid_std_week'
;