Constant.scala 5.96 KB
Newer Older
wang-jinfeng committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
package mobvista.dmp.datasource.device

import java.text.SimpleDateFormat
import java.util
import java.util.regex.Pattern

import com.google.common.collect.Sets
import org.apache.spark.sql.types.{StringType, StructField, StructType}

/**
  * @package: mobvista.dmp.datasource.device
  * @author: wangjf
  * @create: 2018-08-28 16:50
  **/
object Constant {

  val iosPkgPtn = Pattern.compile("^\\d+$")
  val adrPkgPtn = Pattern.compile("^[0-9a-zA-Z\\.]+$")

  var lineSplit: Pattern = Pattern.compile("-")
  val wellSplit: Pattern = Pattern.compile("#")
  val colonSplit: Pattern = Pattern.compile(":")
  val verticalLine: Pattern = Pattern.compile("\\|")
  val dollarSplit: Pattern = Pattern.compile("\\$")
  val `match`: Pattern = Pattern.compile("^0*-0*-0*-0*-0*$")
  val regex: Pattern = Pattern.compile("""^\d+$""")
  val matchingSet: util.HashSet[String] = Sets.newHashSet("", "0", "1970", "GB", "null", "-")
  val format = new SimpleDateFormat("yyyy-MM-dd")
  val sdf = new SimpleDateFormat("yyyyMMdd")
  val didPtn = "^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}$"
  val imeiPtn = "^([0-9]{15,17})$"

  val dm_device_tag_sql =
    s"""
       |SELECT device_id,device_type,tag_type,first_tag,second_tag
       |FROM dwh.dm_device_tag WHERE dt = '@dt' AND business = '@business' AND update_date = '@update_date'
    """.stripMargin

  val dm_install_list_sql: String =
    s"""
       |SELECT device_id,device_type,platform,install_list
       |FROM dwh.dm_install_list WHERE CONCAT(year,month,day) = '@date' AND business = '@business'
    """.stripMargin

  val dm_install_list_v2_sql: String =
    s"""
       |SELECT device_id,device_type,platform,CONCAT_WS(',',COLLECT_SET(CONCAT(package_name,'#','update_date'))) install_list
       |  FROM dwh.dm_install_list_v2 WHERE dt = '@date' AND business = '@business' GROUP BY device_id,device_type,platform
       |  HAVING MAX(update_date) = '@update_date'
    """.stripMargin

  val dm_install_daily_schema = StructType(Array(
    StructField("device_id", StringType),
    StructField("device_type", StringType),
    StructField("platform", StringType),
    StructField("package_name", StringType),
    StructField("update_date", StringType)))

  val dm_device_gender_sql =
    """
      |SELECT t.device_id, t.device_type, t.gender
      |FROM (
      |  SELECT device_id, device_type, gender,
      |     row_number() OVER(PARTITION BY device_id, device_type, gender ORDER BY ratio DESC)AS rk
      |  FROM dwh.dm_device_gender_v2
      |  WHERE year = '@year' AND month = '@month' AND day = '@day' AND update_date = '@update_date'
      |) t
      |WHERE t.rk = '1'
    """.stripMargin

  val dm_device_age_sql =
    s"""
       |select device_id,device_type,split(getAgeRatio(age),'#')[0] as age,split(getAgeRatio(age),'#')[1] as ratio
       |from dwh.dm_device_age_v2
       |where year = '@year' and month = '@month'
       |and day = '@day' and update_date = '@update_date'
    """.stripMargin

  val ageRatio =
    """
      |select t.device_id, t.device_type, t.age
      |from (
      |  select device_id, device_type, age,
      |     row_number() over(partition by device_id, device_type, age order by ratio desc)as rk
      |  from tmp_age
      |) t
      |where t.rk = '1'
    """.stripMargin

  val user_info_sql =
    s"""select t.device_id, t.device_type, t.platform,
       |  case when  upper(t.country) = 'GB'  then 'UK' else t.country end as country,
       |  t.age, t.gender, t.tags, t.install_list, t.first_req_day, t.last_req_day
       |from (
       |   select
       |     coalesce(a.device_id, b.device_id) as device_id,
       |     coalesce(a.device_type, b.device_type) as device_type,
       |     coalesce(a.platform, b.platform) as platform,
       |     coalesce(a.country, b.country) as country,
       |     coalesce(a.age, b.age, '') as age,
       |     coalesce(a.gender, b.gender, '') as gender,
       |     '' as tags,
       |     coalesce(a.install_list,b.install_list, '') as install_list,
       |     case when
       |               b.device_id is null
       |          then
       |               '@date'
       |          else
       |               b.first_req_day
       |          end as first_req_day,
       |     case when
       |               a.device_id is null
       |          then
       |               b.last_req_day
       |          else
       |               '@date'
       |          end as last_req_day
       |   from (
       |     select /*+ mapjoin(t)*/ t.device_id, t.device_type, t.platform, t.country, a.age, g.gender, p.install_list
       |         from (
       |            select t.device_id, t.device_type, t.platform, t.country,
       |              row_number() over(partition by t.device_id, t.device_type order by t.country desc ) as rk
       |            from t_daily t
       |            where t.device_id rlike '${didPtn}' or t.device_id rlike '${imeiPtn}'
       |         ) t
       |         left outer join t_age a on (upper(a.device_id) = upper(t.device_id) and a.device_type = t.device_type)
       |         left outer join t_gender g on (upper(g.device_id) = upper(t.device_id) and g.device_type = t.device_type)
       |         left outer join t_package p on (upper(p.device_id) = upper(t.device_id) and p.device_type = t.device_type)
       |     where t.rk = 1
       |   ) a
       |   full outer join t_total b
       |   on a.device_id = b.device_id and a.device_type = b.device_type
       |) t
        """.stripMargin

  val ods_dmp_user_info_sql =
    s"""
       |SELECT device_id, device_type, platform, country, '' age, '' gender , '' tags,
       | first_req_day, last_req_day
       | FROM dwh.ods_dmp_user_info WHERE dt = '@dt' AND business = '@business'
    """.stripMargin

  val alter_partition =
    """
      |USE dwh;
      |ALTER TABLE @table ADD IF NOT EXISTS PARTITION (@part) LOCATION '@location';
    """.stripMargin

  val old2new_sql: String =
    """
      |SELECT UPPER(CONCAT(tag_type, '-', first_tag, '-', second_tag)) tag_code, new_second_id FROM
      | dwh.dm_old2new_tag
    """.stripMargin
}