tkdm_data_active_detial_day.sql 1.29 KB
Newer Older
mengdongxing committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;  
set mapred.min.split.size.per.rack=100000000;  
set mapred.min.split.size.per.node=100000000;  
set mapred.max.split.size=256000000;  
set hive.merge.mapfiles = true;
set hive.merge.smallfiles.avgsize=16000000;


insert overwrite table tkdm.tkdm_data_active_detial_day partition(ds='2017-02-25')
select /*+mapjoin(b)*/ a.appid,
       a.deviceid,
       a.cid,
       a.ins_date,
       nvl(b.category_id,-1) as category_id,
       nvl(b.isgame,-1) as isgame,
       nvl(b.platform,'-1') as platform
from 

(select appid,
        deviceid,
        cid,
        to_date(ins_datetime) as ins_date
    from tkio.tkio_mid_install_day
    where ds='2017-02-25'
) a 
left join

(select appid,
        category_id,
        isgame,
        platform
    from tkio_v2.tkio_dp_app_info
    where ds='2017-02-25'
) b on a.appid=b.appid
35
cluster by deviceid,appid
mengdongxing committed
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53


create EXTERNAL table tkdm_data_active_detial_day(
appid string,
deviceid string,
cid int,
ins_date string,
category_id int,
isgame int,
platform string
)
PARTITIONED BY (ds string)
clustered by (deviceid) sorted by(deviceid,appid)  into 4 buckets
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY '\t'
STORED AS ORC
location 's3://reyuntkio/warehouse/tkio/tkdm.db/tkdm_data_active_detial_day'
;