1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
set hive.skewjoin.key=10000;
set hive.optimize.skewjoin=true;
set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.merge.mapredfiles=true;
set hive.auto.convert.sortmerge.join=true;
insert overwrite table tkdm.tkdm_base_active_payment_info partition(ds='2017-02-25')
select x.appid,
x.deviceid,
x.cid,
x.first_ins_date,
x.last_ins_date,
x.category_id,
x.isgame,
x.platform,
nvl(x.first_payment_ds,y.payment_ds) as first_payment_ds,
nvl(y.payment_ds,x.last_amt_payment) as last_payment_ds,
x.num_payment_cnt + y.num_payment_cnt as num_payment_cnt,
x.num_payment_day + y.num_payment_day as num_payment_day,
x.amt_payment +y.amt_payment as amt_payment,
nvl(y.amt_payment,x.last_amt_payment) as last_amt_payment
from
(select nvl(a.deviceid,b.deviceid) as deviceid,
nvl(a.appid,b.appid) as appid,
nvl(b.cid,a.cid) as cid,
nvl(a.first_ins_date,b.ins_date) as first_ins_date,
nvl(b.ins_date,a.last_ins_date) as last_ins_date,
nvl(b.category_id,a.category_id) as category_id,
nvl(b.isgame,a.isgame) as isgame,
nvl(b.platform,a.platform) as platform,
a.first_payment_ds,
a.last_payment_ds,
nvl(a.num_payment_cnt,0) as num_payment_cnt,
nvl(a.num_payment_day,0) as num_payment_day,
nvl(a.amt_payment,0) amt_payment,
nvl(a.last_amt_payment,0) as last_amt_payment
from
(select deviceid,
appid,
cid,
first_ins_date,
last_ins_date,
category_id,
isgame,
platform,
first_payment_ds,
last_payment_ds,
num_payment_cnt,
num_payment_day,
amt_payment,
last_amt_payment
from tkdm.tkdm_base_active_payment_info
where ds=date_sub('2017-02-25',1)
) a
full join
(select deviceid,
appid,
cid,
ins_date,
category_id,
isgame,
platform
from tkdm.tkdm_data_active_detial_day
where ds='2017-02-25'
) b on a.deviceid=b.deviceid and a.appid=b.appid
) x
left join
(select deviceid,
appid,
to_date(ins_datetime) as ins_date,
'2017-02-25' as payment_ds,
num_payment_cnt,
1 as num_payment_day,
amt_payment
from tkdm.tkdm_data_payment_detial_day
where ds='2017-02-25'
) y on x.deviceid=y.deviceid and x.appid=y.appid
cluster by deviceid,appid
create EXTERNAL table tkdm_base_active_payment_info(
appid string,
deviceid string,
cid int,
first_ins_date string,
last_ins_date string,
category_id int,
isgame int,
platform string,
first_payment_ds string,
last_payment_ds string,
num_payment_cnt int,
num_payment_day int,
amt_payment double,
last_amt_payment double
)
PARTITIONED BY (ds string)
clustered by (deviceid) sorted by(deviceid,appid) into 32 buckets
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS ORC
location 's3://reyuntkio/warehouse/tkio/tkdm.db/tkdm_base_active_payment_info'
;