player.sql 1.3 KB
Newer Older
WangJinfeng committed
1
use emr_doppler_v1;
wang-jinfeng committed
2 3 4 5 6 7 8 9 10 11 12 13 14

set hive.exec.compress.output=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
set mapred.max.split.size=536870912;
set mapred.min.split.size.per.node=536870912;
set mapred.min.split.size.per.rack=536870912;
set mapreduce.map.memory.mb=2048;
set mapreduce.reduce.memory.mb=3072;
set hive.map.aggr = true;
set hive.optimize.skewjoin=true;

ALTER TABLE player DROP PARTITION (yyyymmdd='${part_pre1_path}');

WangJinfeng committed
15
-- ALTER TABLE emr_doppler_v1.player SET LOCATION 's3://mob-emr-test/dataplatform/emr/plalyer';
wang-jinfeng committed
16 17 18 19 20 21

ALTER TABLE player ADD IF NOT EXISTS PARTITION (yyyymmdd='${part_pre1_path}') LOCATION 's3://mob-emr-test/dataplatform/emr/player/${part_pre1_path}';

drop table player_tmp_${date_str_undline};

create table player_tmp_${date_str_undline} as select md5(concat(game_id,user_id)) as player_id,game_id,user_id
WangJinfeng committed
22
from emr_doppler_v1.event_${date_str_undline}  group by player_id,game_id,user_id;
wang-jinfeng committed
23 24 25 26 27 28 29 30 31

insert overwrite table player partition(yyyymmdd='${part_pre1_path}')
select x.player_id,x.game_id,user_id from (
select player_id,game_id,user_id from player where yyyymmdd='${part_pre2_path}' and game_id is not null
UNION ALL
select player_id,game_id,user_id from player_tmp_${date_str_undline}
) x group by x.player_id,x.game_id,x.user_id;

drop table player_tmp_${date_str_undline};