Constant.scala 5.96 KB
package mobvista.dmp.datasource.device

import java.text.SimpleDateFormat
import java.util
import java.util.regex.Pattern

import com.google.common.collect.Sets
import org.apache.spark.sql.types.{StringType, StructField, StructType}

/**
  * @package: mobvista.dmp.datasource.device
  * @author: wangjf
  * @create: 2018-08-28 16:50
  **/
object Constant {

  val iosPkgPtn = Pattern.compile("^\\d+$")
  val adrPkgPtn = Pattern.compile("^[0-9a-zA-Z\\.]+$")

  var lineSplit: Pattern = Pattern.compile("-")
  val wellSplit: Pattern = Pattern.compile("#")
  val colonSplit: Pattern = Pattern.compile(":")
  val verticalLine: Pattern = Pattern.compile("\\|")
  val dollarSplit: Pattern = Pattern.compile("\\$")
  val `match`: Pattern = Pattern.compile("^0*-0*-0*-0*-0*$")
  val regex: Pattern = Pattern.compile("""^\d+$""")
  val matchingSet: util.HashSet[String] = Sets.newHashSet("", "0", "1970", "GB", "null", "-")
  val format = new SimpleDateFormat("yyyy-MM-dd")
  val sdf = new SimpleDateFormat("yyyyMMdd")
  val didPtn = "^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}$"
  val imeiPtn = "^([0-9]{15,17})$"

  val dm_device_tag_sql =
    s"""
       |SELECT device_id,device_type,tag_type,first_tag,second_tag
       |FROM dwh.dm_device_tag WHERE dt = '@dt' AND business = '@business' AND update_date = '@update_date'
    """.stripMargin

  val dm_install_list_sql: String =
    s"""
       |SELECT device_id,device_type,platform,install_list
       |FROM dwh.dm_install_list WHERE CONCAT(year,month,day) = '@date' AND business = '@business'
    """.stripMargin

  val dm_install_list_v2_sql: String =
    s"""
       |SELECT device_id,device_type,platform,CONCAT_WS(',',COLLECT_SET(CONCAT(package_name,'#','update_date'))) install_list
       |  FROM dwh.dm_install_list_v2 WHERE dt = '@date' AND business = '@business' GROUP BY device_id,device_type,platform
       |  HAVING MAX(update_date) = '@update_date'
    """.stripMargin

  val dm_install_daily_schema = StructType(Array(
    StructField("device_id", StringType),
    StructField("device_type", StringType),
    StructField("platform", StringType),
    StructField("package_name", StringType),
    StructField("update_date", StringType)))

  val dm_device_gender_sql =
    """
      |SELECT t.device_id, t.device_type, t.gender
      |FROM (
      |  SELECT device_id, device_type, gender,
      |     row_number() OVER(PARTITION BY device_id, device_type, gender ORDER BY ratio DESC)AS rk
      |  FROM dwh.dm_device_gender_v2
      |  WHERE year = '@year' AND month = '@month' AND day = '@day' AND update_date = '@update_date'
      |) t
      |WHERE t.rk = '1'
    """.stripMargin

  val dm_device_age_sql =
    s"""
       |select device_id,device_type,split(getAgeRatio(age),'#')[0] as age,split(getAgeRatio(age),'#')[1] as ratio
       |from dwh.dm_device_age_v2
       |where year = '@year' and month = '@month'
       |and day = '@day' and update_date = '@update_date'
    """.stripMargin

  val ageRatio =
    """
      |select t.device_id, t.device_type, t.age
      |from (
      |  select device_id, device_type, age,
      |     row_number() over(partition by device_id, device_type, age order by ratio desc)as rk
      |  from tmp_age
      |) t
      |where t.rk = '1'
    """.stripMargin

  val user_info_sql =
    s"""select t.device_id, t.device_type, t.platform,
       |  case when  upper(t.country) = 'GB'  then 'UK' else t.country end as country,
       |  t.age, t.gender, t.tags, t.install_list, t.first_req_day, t.last_req_day
       |from (
       |   select
       |     coalesce(a.device_id, b.device_id) as device_id,
       |     coalesce(a.device_type, b.device_type) as device_type,
       |     coalesce(a.platform, b.platform) as platform,
       |     coalesce(a.country, b.country) as country,
       |     coalesce(a.age, b.age, '') as age,
       |     coalesce(a.gender, b.gender, '') as gender,
       |     '' as tags,
       |     coalesce(a.install_list,b.install_list, '') as install_list,
       |     case when
       |               b.device_id is null
       |          then
       |               '@date'
       |          else
       |               b.first_req_day
       |          end as first_req_day,
       |     case when
       |               a.device_id is null
       |          then
       |               b.last_req_day
       |          else
       |               '@date'
       |          end as last_req_day
       |   from (
       |     select /*+ mapjoin(t)*/ t.device_id, t.device_type, t.platform, t.country, a.age, g.gender, p.install_list
       |         from (
       |            select t.device_id, t.device_type, t.platform, t.country,
       |              row_number() over(partition by t.device_id, t.device_type order by t.country desc ) as rk
       |            from t_daily t
       |            where t.device_id rlike '${didPtn}' or t.device_id rlike '${imeiPtn}'
       |         ) t
       |         left outer join t_age a on (upper(a.device_id) = upper(t.device_id) and a.device_type = t.device_type)
       |         left outer join t_gender g on (upper(g.device_id) = upper(t.device_id) and g.device_type = t.device_type)
       |         left outer join t_package p on (upper(p.device_id) = upper(t.device_id) and p.device_type = t.device_type)
       |     where t.rk = 1
       |   ) a
       |   full outer join t_total b
       |   on a.device_id = b.device_id and a.device_type = b.device_type
       |) t
        """.stripMargin

  val ods_dmp_user_info_sql =
    s"""
       |SELECT device_id, device_type, platform, country, '' age, '' gender , '' tags,
       | first_req_day, last_req_day
       | FROM dwh.ods_dmp_user_info WHERE dt = '@dt' AND business = '@business'
    """.stripMargin

  val alter_partition =
    """
      |USE dwh;
      |ALTER TABLE @table ADD IF NOT EXISTS PARTITION (@part) LOCATION '@location';
    """.stripMargin

  val old2new_sql: String =
    """
      |SELECT UPPER(CONCAT(tag_type, '-', first_tag, '-', second_tag)) tag_code, new_second_id FROM
      | dwh.dm_old2new_tag
    """.stripMargin
}