diff --git a/azkaban/mysql2hive.sh b/azkaban/mysql2hive.sh index 1836ebf..15ab792 100644 --- a/azkaban/mysql2hive.sh +++ b/azkaban/mysql2hive.sh @@ -15,14 +15,16 @@ sqoop import "-Dorg.apache.sqoop.splitter.allow_text_splitter=true" --connect jd base_dir="s3://mob-emr-test/dataplatform/DataWareHouse/data/dwh/dim_package_tags" sql="INSERT OVERWRITE DIRECTORY '${base_dir}/${date_path}' - SELECT a.package_name,a.platform,b.tag_type,b.first_tag,b.second_tag,b.comment_cn - FROM (SELECT package_name,lower(platform) platform,feat_id - FROM dev.app_tag - LATERAL VIEW EXPLODE(SPLIT(REGEXP_REPLACE(REGEXP_REPLACE(SUBSTRING(category,2,LENGTH(category) - 2),'\"',''),' ',''),',')) featTable AS feat_id) a - INNER JOIN dwh.dm_old2new_tag b ON a.feat_id = b.new_second_id - UNION ALL - SELECT package_name,platform,tag_type,first_tag,second_tag,comment - FROM dwh.dim_package_tags + SELECT DISTINCT * FROM + (SELECT a.package_name,a.platform,b.tag_type,b.first_tag,b.second_tag,b.comment_cn + FROM (SELECT package_name,lower(platform) platform,feat_id + FROM dev.app_tag + LATERAL VIEW EXPLODE(SPLIT(REGEXP_REPLACE(REGEXP_REPLACE(SUBSTRING(category,2,LENGTH(category) - 2),'\"',''),' ',''),',')) featTable AS feat_id) a + INNER JOIN dwh.dm_old2new_tag b ON a.feat_id = b.new_second_id + UNION ALL + SELECT package_name,platform,tag_type,first_tag,second_tag,comment + FROM dwh.dim_package_tags + ) package_tags " hive -e "${sql}"