Commit 02a7d694 by mengdongxing

Delete tkdm_output_user_cluster_top_day.sql

parent add57431
set mapred.max.split.size=256000000;
set mapred.min.split.size.per.node=256000000
set Mapred.min.split.size.per.rack=256000000
set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat
set hive.groupby.skewindata=true;
insert overwrite table tkdm.tkdm_output_user_cluster_top_all partition(ds='2017-02-25')
select '2017-02-25' as dt,
cid,
category_id,
isgame,
num_user,
top_rank
from (
select cid,
category_id,
isgame,
num_user,
dense_rank() over(partition by category_id,isgame order by num_user desc ) as top_rank
from (
select cid,
category_id,
isgame,
count(1) as num_user
from tkdm.tkdm_base_active_payment_info
where ds='2017-02-25' and
last_ins_date between add_months('2017-02-25',-2) and '2017-02-25'
group by cid,category_id,isgame
having count(1) >100
cluster by cid,category_id,isgame
) x
) y
where top_rank <=100
create EXTERNAL table tkdm_output_user_cluster_top_all(
dt string,
cid int,
category_id int,
isgame int,
num_user int,
top_rank int
)
PARTITIONED BY (ds string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS ORC
location 's3://reyuntkio/warehouse/tkio/tkdm.db/tkdm_output_user_cluster_top_all'
;
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment