package mobvista.dmp.datasource.device import java.text.SimpleDateFormat import java.util import java.util.regex.Pattern import com.google.common.collect.Sets import org.apache.spark.sql.types.{StringType, StructField, StructType} /** * @package: mobvista.dmp.datasource.device * @author: wangjf * @create: 2018-08-28 16:50 **/ object Constant { val iosPkgPtn = Pattern.compile("^\\d+$") val adrPkgPtn = Pattern.compile("^[0-9a-zA-Z\\.]+$") var lineSplit: Pattern = Pattern.compile("-") val wellSplit: Pattern = Pattern.compile("#") val colonSplit: Pattern = Pattern.compile(":") val verticalLine: Pattern = Pattern.compile("\\|") val dollarSplit: Pattern = Pattern.compile("\\$") val `match`: Pattern = Pattern.compile("^0*-0*-0*-0*-0*$") val regex: Pattern = Pattern.compile("""^\d+$""") val matchingSet: util.HashSet[String] = Sets.newHashSet("", "0", "1970", "GB", "null", "-") val format = new SimpleDateFormat("yyyy-MM-dd") val sdf = new SimpleDateFormat("yyyyMMdd") val didPtn = "^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}$" val imeiPtn = "^([0-9]{15,17})$" val dm_device_tag_sql = s""" |SELECT device_id,device_type,tag_type,first_tag,second_tag |FROM dwh.dm_device_tag WHERE dt = '@dt' AND business = '@business' AND update_date = '@update_date' """.stripMargin val dm_install_list_sql: String = s""" |SELECT device_id,device_type,platform,install_list |FROM dwh.dm_install_list WHERE CONCAT(year,month,day) = '@date' AND business = '@business' """.stripMargin val dm_install_list_v2_sql: String = s""" |SELECT device_id,device_type,platform,CONCAT_WS(',',COLLECT_SET(CONCAT(package_name,'#','update_date'))) install_list | FROM dwh.dm_install_list_v2 WHERE dt = '@date' AND business = '@business' GROUP BY device_id,device_type,platform | HAVING MAX(update_date) = '@update_date' """.stripMargin val dm_install_daily_schema = StructType(Array( StructField("device_id", StringType), StructField("device_type", StringType), StructField("platform", StringType), StructField("package_name", StringType), StructField("update_date", StringType))) val dm_device_gender_sql = """ |SELECT t.device_id, t.device_type, t.gender |FROM ( | SELECT device_id, device_type, gender, | row_number() OVER(PARTITION BY device_id, device_type, gender ORDER BY ratio DESC)AS rk | FROM dwh.dm_device_gender_v2 | WHERE year = '@year' AND month = '@month' AND day = '@day' AND update_date = '@update_date' |) t |WHERE t.rk = '1' """.stripMargin val dm_device_age_sql = s""" |select device_id,device_type,split(getAgeRatio(age),'#')[0] as age,split(getAgeRatio(age),'#')[1] as ratio |from dwh.dm_device_age_v2 |where year = '@year' and month = '@month' |and day = '@day' and update_date = '@update_date' """.stripMargin val ageRatio = """ |select t.device_id, t.device_type, t.age |from ( | select device_id, device_type, age, | row_number() over(partition by device_id, device_type, age order by ratio desc)as rk | from tmp_age |) t |where t.rk = '1' """.stripMargin val user_info_sql = s"""select t.device_id, t.device_type, t.platform, | case when upper(t.country) = 'GB' then 'UK' else t.country end as country, | t.age, t.gender, t.tags, t.install_list, t.first_req_day, t.last_req_day |from ( | select | coalesce(a.device_id, b.device_id) as device_id, | coalesce(a.device_type, b.device_type) as device_type, | coalesce(a.platform, b.platform) as platform, | coalesce(a.country, b.country) as country, | coalesce(a.age, b.age, '') as age, | coalesce(a.gender, b.gender, '') as gender, | '' as tags, | coalesce(a.install_list,b.install_list, '') as install_list, | case when | b.device_id is null | then | '@date' | else | b.first_req_day | end as first_req_day, | case when | a.device_id is null | then | b.last_req_day | else | '@date' | end as last_req_day | from ( | select /*+ mapjoin(t)*/ t.device_id, t.device_type, t.platform, t.country, a.age, g.gender, p.install_list | from ( | select t.device_id, t.device_type, t.platform, t.country, | row_number() over(partition by t.device_id, t.device_type order by t.country desc ) as rk | from t_daily t | where t.device_id rlike '${didPtn}' or t.device_id rlike '${imeiPtn}' | ) t | left outer join t_age a on (upper(a.device_id) = upper(t.device_id) and a.device_type = t.device_type) | left outer join t_gender g on (upper(g.device_id) = upper(t.device_id) and g.device_type = t.device_type) | left outer join t_package p on (upper(p.device_id) = upper(t.device_id) and p.device_type = t.device_type) | where t.rk = 1 | ) a | full outer join t_total b | on a.device_id = b.device_id and a.device_type = b.device_type |) t """.stripMargin val ods_dmp_user_info_sql = s""" |SELECT device_id, device_type, platform, country, '' age, '' gender , '' tags, | first_req_day, last_req_day | FROM dwh.ods_dmp_user_info WHERE dt = '@dt' AND business = '@business' """.stripMargin val alter_partition = """ |USE dwh; |ALTER TABLE @table ADD IF NOT EXISTS PARTITION (@part) LOCATION '@location'; """.stripMargin val old2new_sql: String = """ |SELECT UPPER(CONCAT(tag_type, '-', first_tag, '-', second_tag)) tag_code, new_second_id FROM | dwh.dm_old2new_tag """.stripMargin }