use emr_doppler;

set hive.exec.compress.output=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
set mapred.max.split.size=536870912;
set mapred.min.split.size.per.node=536870912;
set mapred.min.split.size.per.rack=536870912;
set mapreduce.map.memory.mb=2048;
set mapreduce.reduce.memory.mb=3072;
set hive.map.aggr = true;
set hive.optimize.skewjoin=true;

ALTER TABLE player DROP PARTITION (yyyymmdd='${part_pre1_path}');

-- ALTER TABLE emr_doppler.player SET LOCATION 's3://mob-emr-test/dataplatform/emr/plalyer';

ALTER TABLE player ADD IF NOT EXISTS PARTITION (yyyymmdd='${part_pre1_path}') LOCATION 's3://mob-emr-test/dataplatform/emr/player/${part_pre1_path}';

drop table player_tmp_${date_str_undline};

create table player_tmp_${date_str_undline} as select md5(concat(game_id,user_id)) as player_id,game_id,user_id
from event_${date_str_undline}  group by player_id,game_id,user_id;

insert overwrite table player partition(yyyymmdd='${part_pre1_path}')
select x.player_id,x.game_id,user_id from (
select player_id,game_id,user_id from player where yyyymmdd='${part_pre2_path}' and game_id is not null
UNION ALL
select player_id,game_id,user_id from player_tmp_${date_str_undline}
) x group by x.player_id,x.game_id,x.user_id;

drop table player_tmp_${date_str_undline};