#!/usr/bash

today=${ScheduleTime:-$1}
date_path=$(date +%Y/%m/%d -d "-1 day $today")

java -jar -Dloader.main=com.mobvista.apptag.utils.StorageTag apptag.jar
if [[ $? -ne 0 ]]; then
    exit 255
fi

# sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" --connect jdbc:mysql://dataplatform-app-tag.c5yzcdreb1xr.us-east-1.rds.amazonaws.com:3306/app_tag  \
#     --username apptag_rw --password 7gyLEVtkER3u8c9 --table tag_result --columns 'package_name,app_name,platform,feat_id' -m 1 --null-string '\\N' --null-non-string '\\N' \
#     --target-dir 's3://mob-emr-test/wangjf/data/dwh/app_tag' --fields-terminated-by '\001' --delete-target-dir

# sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" --connect jdbc:mysql://dataplatform-app-tag.c5yzcdreb1xr.us-east-1.rds.amazonaws.com:3306/app_tag  \
#     --username apptag_rw --password 7gyLEVtkER3u8c9 --query 'select package_name,app_name,lower(platform),json_keys(feat_id) from tag_result where $CONDITIONS and feat_id is not null' -m 1 --null-string '\\N' --null-non-string '\\N' \
#     --target-dir 's3://mob-emr-test/wangjf/data/dwh/app_tag' --fields-terminated-by '\001' --delete-target-dir

sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" --connect jdbc:mysql://dataplatform-app-tag.c5yzcdreb1xr.us-east-1.rds.amazonaws.com:3306/app_tag  \
    --username apptag_rw --password 7gyLEVtkER3u8c9 --query 'select package_name,app_name,lower(platform),json_keys(tag) from storage where $CONDITIONS and tag is not null' -m 1 --null-string '\\N' --null-non-string '\\N' \
    --target-dir 's3://mob-emr-test/wangjf/data/dwh/app_tag' --fields-terminated-by '\001' --delete-target-dir

if [[ $? -ne 0 ]]; then
    exit 255
fi

base_dir="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_package_tags"

sql="INSERT OVERWRITE DIRECTORY '${base_dir}/${date_path}'
        ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
        SELECT package_name, platform, tag_type,first_tag, second_tag, CONCAT_WS(',',COLLECT_SET(comment)) comment FROM
            (SELECT a.package_name,a.platform,b.tag_type,b.first_tag,b.second_tag,b.comment_cn comment
                FROM (SELECT package_name,lower(platform) platform,feat_id
                    FROM dev.app_tag
                    LATERAL VIEW EXPLODE(SPLIT(REGEXP_REPLACE(REGEXP_REPLACE(SUBSTRING(category,2,LENGTH(category) - 2),'\"',''),' ',''),',')) featTable AS feat_id) a
                INNER JOIN dwh.dm_old2new_tag b ON a.feat_id = b.new_second_id
            UNION ALL
            SELECT c.package_name,c.platform,c.tag_type,c.first_tag,c.second_tag,c.comment
            FROM dwh.dim_package_tags c LEFT JOIN dev.app_tag d ON LOWER(c.package_name) = LOWER(d.package_name)
            WHERE d.package_name IS NULL
        ) package_tags GROUP BY package_name, platform, tag_type,first_tag, second_tag
    "

hive -e "${sql}"
if [[ $? -ne 0 ]]; then
    exit 255
fi

hive -e "use dwh;alter table dim_package_tags set location '${base_dir}/${date_path}';"