use emr_doppler;
set hive.exec.compress.output=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
set mapreduce.map.memory.mb=2048;
set mapreduce.reduce.memory.mb=3072;

DROP TABLE progression_checkpoint_${date_str_undline};

CREATE external TABLE IF NOT EXISTS progression_checkpoint_${date_str_undline}
(
    game_id     int,
    checkpoint  date,
    player_id   string,
    attempt_num int,
    score       int,
    count_score int,
    min_score   int,
    max_score   int,
    sum_score   bigint,
    sum2_score  bigint,
    type        string,
    f1          string,
    f2          string,
    f3          string,
    f4          string
)row format delimited fields terminated by '|'
    location 's3://mob-emr-test/dataplatform/emr/${prefix}/progression_checkpoint/';

DROP TABLE session_checkpoint_${date_str_undline};
CREATE external TABLE IF NOT EXISTS session_checkpoint_${date_str_undline}
(
    game_id    int,
    checkpoint date,
    player_id  string,
    start_ts   timestamp,
    end_ts     timestamp,
    num        int
)row format delimited fields terminated by '|'
    location 's3://mob-emr-test/dataplatform/emr/${prefix}/session_checkpoint/';

set mapred.reduce.tasks=200;

INSERT OVERWRITE table session_checkpoint_${date_str_undline}
SELECT game_id,
       to_date(MAX(arrival_ts))      as checkpoint,
       player_id                     AS player_id,
       MIN(arrival_ts)               AS start_ts,
       MAX(arrival_ts)               AS end_ts,
       MAX(session_num % 2147483647) AS num
FROM event_${date_str_undline}
WHERE session_id IS NOT NULL
GROUP BY game_id, player_id, session_id;

set mapred.reduce.tasks=14;

-- ALTER TABLE emr_doppler.dim_player_session SET LOCATION 's3://mob-emr-test/dataplatform/emr/dim_player_session';

insert OVERWRITE table dim_player_session
SELECT game_id,
       player_id,
       COUNT(*)                                                               AS count_session_length,
       MIN(unix_timestamp(end_ts) - unix_timestamp(start_ts))                 AS min_session_length,
       MAX(unix_timestamp(end_ts) - unix_timestamp(start_ts))                 AS max_session_length,
       SUM(unix_timestamp(end_ts) - unix_timestamp(start_ts))                 AS sum_session_length,
       SUM(CAST(unix_timestamp(end_ts) - unix_timestamp(start_ts) AS BIGINT) *
           CAST(unix_timestamp(end_ts) - unix_timestamp(start_ts) AS BIGINT)) AS sum2_session_length,
       MIN(num)                                                               AS min_session_num,
       MAX(num)                                                               AS max_session_num
FROM session_checkpoint_${date_str_undline}
GROUP BY game_id, player_id;

set mapred.reduce.tasks=200;

INSERT OVERWRITE table progression_checkpoint_${date_str_undline}
SELECT game_id,
       to_date(MAX(arrival_ts)) AS checkpoint,
       player_id                AS player_id,
       MAX(attempt_num)         AS attempt_num,
       AVG(score)               AS score,
       COUNT(score)             AS count_score,
       MIN(score)               AS min_score,
       MAX(score)               AS max_score,
       CASE
           WHEN ABS(SUM(score)) >= 2147483647 THEN 0
           ELSE SUM(score) END  AS sum_score,
       0                        AS sum2_score,
       split(event_id, ':')[0]  AS type,
       split(event_id, ':')[1]  AS f1,
       split(event_id, ':')[2]  AS f2,
       split(event_id, ':')[3]  AS f3,
       split(event_id, ':')[4]  AS f4
FROM event_${date_str_undline}
WHERE category = 'progression'
  AND score >= -2147483648
GROUP BY game_id, player_id, event_id;