use emr_doppler; set hive.exec.compress.output=true; set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec; set mapred.max.split.size=536870912; set mapred.min.split.size.per.node=536870912; set mapred.min.split.size.per.rack=536870912; set mapreduce.map.memory.mb=2048; set mapreduce.reduce.memory.mb=3072; set hive.map.aggr = true; set hive.optimize.skewjoin=true; ALTER TABLE player DROP PARTITION (yyyymmdd='${part_pre1_path}'); -- ALTER TABLE emr_doppler.player SET LOCATION 's3://mob-emr-test/dataplatform/emr/plalyer'; ALTER TABLE player ADD IF NOT EXISTS PARTITION (yyyymmdd='${part_pre1_path}') LOCATION 's3://mob-emr-test/dataplatform/emr/player/${part_pre1_path}'; drop table player_tmp_${date_str_undline}; create table player_tmp_${date_str_undline} as select md5(concat(game_id,user_id)) as player_id,game_id,user_id from event_${date_str_undline} group by player_id,game_id,user_id; insert overwrite table player partition(yyyymmdd='${part_pre1_path}') select x.player_id,x.game_id,user_id from ( select player_id,game_id,user_id from player where yyyymmdd='${part_pre2_path}' and game_id is not null UNION ALL select player_id,game_id,user_id from player_tmp_${date_str_undline} ) x group by x.player_id,x.game_id,x.user_id; drop table player_tmp_${date_str_undline};