use emr_doppler; set hive.exec.compress.output=true; set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec; set mapreduce.map.memory.mb=2048; set mapreduce.reduce.memory.mb=3072; DROP TABLE progression_checkpoint_${date_str_undline}; CREATE external TABLE IF NOT EXISTS progression_checkpoint_${date_str_undline} ( game_id int, checkpoint date, player_id string, attempt_num int, score int, count_score int, min_score int, max_score int, sum_score bigint, sum2_score bigint, type string, f1 string, f2 string, f3 string, f4 string )row format delimited fields terminated by '|' location 's3://mob-emr-test/dataplatform/emr/${prefix}/progression_checkpoint/'; DROP TABLE session_checkpoint_${date_str_undline}; CREATE external TABLE IF NOT EXISTS session_checkpoint_${date_str_undline} ( game_id int, checkpoint date, player_id string, start_ts timestamp, end_ts timestamp, num int )row format delimited fields terminated by '|' location 's3://mob-emr-test/dataplatform/emr/${prefix}/session_checkpoint/'; set mapred.reduce.tasks=200; INSERT OVERWRITE table session_checkpoint_${date_str_undline} SELECT game_id, to_date(MAX(arrival_ts)) as checkpoint, player_id AS player_id, MIN(arrival_ts) AS start_ts, MAX(arrival_ts) AS end_ts, MAX(session_num % 2147483647) AS num FROM event_${date_str_undline} WHERE session_id IS NOT NULL GROUP BY game_id, player_id, session_id; set mapred.reduce.tasks=14; -- ALTER TABLE emr_doppler.dim_player_session SET LOCATION 's3://mob-emr-test/dataplatform/emr/dim_player_session'; insert OVERWRITE table dim_player_session SELECT game_id, player_id, COUNT(*) AS count_session_length, MIN(unix_timestamp(end_ts) - unix_timestamp(start_ts)) AS min_session_length, MAX(unix_timestamp(end_ts) - unix_timestamp(start_ts)) AS max_session_length, SUM(unix_timestamp(end_ts) - unix_timestamp(start_ts)) AS sum_session_length, SUM(CAST(unix_timestamp(end_ts) - unix_timestamp(start_ts) AS BIGINT) * CAST(unix_timestamp(end_ts) - unix_timestamp(start_ts) AS BIGINT)) AS sum2_session_length, MIN(num) AS min_session_num, MAX(num) AS max_session_num FROM session_checkpoint_${date_str_undline} GROUP BY game_id, player_id; set mapred.reduce.tasks=200; INSERT OVERWRITE table progression_checkpoint_${date_str_undline} SELECT game_id, to_date(MAX(arrival_ts)) AS checkpoint, player_id AS player_id, MAX(attempt_num) AS attempt_num, AVG(score) AS score, COUNT(score) AS count_score, MIN(score) AS min_score, MAX(score) AS max_score, CASE WHEN ABS(SUM(score)) >= 2147483647 THEN 0 ELSE SUM(score) END AS sum_score, 0 AS sum2_score, split(event_id, ':')[0] AS type, split(event_id, ':')[1] AS f1, split(event_id, ':')[2] AS f2, split(event_id, ':')[3] AS f3, split(event_id, ':')[4] AS f4 FROM event_${date_str_undline} WHERE category = 'progression' AND score >= -2147483648 GROUP BY game_id, player_id, event_id;