1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
set hive.skewjoin.key=10000;
set hive.optimize.skewjoin=true;
set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.merge.mapredfiles=true;
set hive.auto.convert.sortmerge.join=true;
--insert overwrite table tkdm.tkdm_base_device_pay_info partition(ds='2017-02-24')
select nvl(b.appid,a.appid) as appid,
nvl(b.deviceid,a.deviceid) as deviceid,
nvl(b.first_payment_ds,'2017-02-24') as first_payment_ds,
if(isnull(a.deviceid),'2017-02-24',nvl(b.last_payment_ds,'1970-01-01')) as last_payment_ds,
nvl(b.num_payment_cnt,0)+nvl(a.num_payment_cnt,0) as num_payment_cnt,
nvl(b.num_payment_day,0)+nvl(a.num_payment_day,0) as num_payment_day,
round(nvl(b.amt_payment,0) + nvl(a.amt_payment,0),2) as amt_payment,
nvl(a.is_new,0) as is_new_for_last_ds
from
(select appid,
deviceid,
is_new,
amt_payment,
num_payment_cnt,
1 as num_payment_day
from tkdm.tkdm_data_payment_detial_day
where ds='2017-02-24' and
deviceid!='00000000-0000-0000-0000-000000000000'
and deviceid!='' and deviceid!='0'
) a
full join
(select appid,
deviceid,
first_payment_ds,
last_payment_ds,
num_payment_cnt,
num_payment_day,
amt_payment
from tkdm.tkdm_base_device_pay_info
where ds= date_sub('2017-02-24',1)
) b on a.appid=b.appid and a.deviceid=b.deviceid
use tkdm;
create EXTERNAL table tkdm_base_device_pay_info(
appid string,
deviceid string,
first_payment_ds string,
last_payment_ds string,
num_payment_cnt int,
num_payment_day int,
amt_payment double,
is_new_for_last_ds int
)
PARTITIONED BY (ds string)
clustered by (deviceid) sorted by(deviceid,appid) into 32 buckets
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS ORC
location 's3://reyuntkio/warehouse/tkio/tkdm.db/tkdm_base_device_pay_info'
;