Commit 59a91aaf by WangJinfeng

init id-mapping v1.0

parent 4fff73b8
package mobvista.dmp.datasource.id_mapping package mobvista.dmp.datasource.id_mapping
import com.alibaba.fastjson.JSONObject
import mobvista.dmp.common.MobvistaConstant._ import mobvista.dmp.common.MobvistaConstant._
import org.apache.commons.lang3.StringUtils import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.Row import org.apache.spark.sql.Row
import org.apache.spark.sql.types._ import org.apache.spark.sql.types._
import java.net.URLDecoder import java.net.URLDecoder
import scala.collection.mutable.ArrayBuffer
/** /**
* @package: mobvista.dmp.datasource.id_mapping * @package: mobvista.dmp.datasource.id_mapping
...@@ -243,6 +245,18 @@ object Constant { ...@@ -243,6 +245,18 @@ object Constant {
| GROUP BY imei, android_id, pkg_name, oaid, gaid, sysid, bkupid, xwho, user_id, country, ip, ua, brand, model, os_version, osv_upt, upt | GROUP BY imei, android_id, pkg_name, oaid, gaid, sysid, bkupid, xwho, user_id, country, ip, ua, brand, model, os_version, osv_upt, upt
|""".stripMargin |""".stripMargin
val ios_id_mapping_sql_v2: String =
"""
|SELECT idfa, idfv, pkg_name, sysid, bkupid, xwho, user_id, country, ip, ua, brand, model, os_version, osv_upt, upt, cnt
| FROM dws.dws_device_id_ios_frequency WHERE dt = '@date' @filter_country
|""".stripMargin
val android_id_mapping_sql_v2: String =
"""
|SELECT imei, android_id, pkg_name, oaid, gaid, sysid, bkupid, xwho, user_id, country, ip, ua, brand, model, os_version, osv_upt, upt, cnt
| FROM dws.dws_device_id_android_frequency WHERE dt = '@date' @filter_country
|""".stripMargin
val old_id_mapping_sql: String = val old_id_mapping_sql: String =
""" """
| |
...@@ -282,42 +296,42 @@ object Constant { ...@@ -282,42 +296,42 @@ object Constant {
def process(idfa: String, idfv: String, pkg_name: String, imei: String, androidId: String, oaid: String, gaid: String, sysId: String, def process(idfa: String, idfv: String, pkg_name: String, imei: String, androidId: String, oaid: String, gaid: String, sysId: String,
bkupId: String, country: String, ip: String, ua: String, brand: String, model: String, os_version: String, osv_upt: String, bkupId: String, country: String, ip: String, ua: String, brand: String, model: String, os_version: String, osv_upt: String,
upt: String, network_type: String, platform: String, cnt: Long): (String, Row) = { upt: String, network_type: String, platform: String, cnt: Long): (String, Row) = {
val f_idfa = if (StringUtils.isNotBlank(idfa) && idfa.matches(didPtn) && !idfa.matches(allZero)) { val f_idfa = if (StringUtils.isNotBlank(idfa) && (idfa.matches(didPtn) && !idfa.matches(allZero) || idfa.matches(md5Ptn))) {
idfa idfa
} else { } else {
"" ""
} }
val f_idfv = if (StringUtils.isNotBlank(idfv) && idfv.matches(didPtn) && !idfa.matches(allZero)) { val f_idfv = if (StringUtils.isNotBlank(idfv) && (idfv.matches(didPtn) && !idfa.matches(allZero) || idfv.matches(md5Ptn))) {
idfv idfv
} else { } else {
"" ""
} }
val f_imei = if (StringUtils.isNotBlank(imei) && imei.matches(imeiPtn) && !imei.matches(imeiPtnAll)) { val f_imei = if (StringUtils.isNotBlank(imei) && (imei.matches(imeiPtn) && !imei.matches(imeiPtnAll) || imei.matches(md5Ptn))) {
imei imei
} else { } else {
"" ""
} }
val f_androidId = if (StringUtils.isNotBlank(androidId) && androidId.matches(andriodIdPtn) && !androidId.matches(andriodIdAll)) { val f_androidId = if (StringUtils.isNotBlank(androidId) && (androidId.matches(andriodIdPtn) && !androidId.matches(andriodIdAll) || androidId.matches(md5Ptn))) {
androidId androidId
} else { } else {
"" ""
} }
val f_oaid = if (StringUtils.isNotBlank(oaid) && oaid.length >= 16 && oaid.length <= 64) { val f_oaid = if (StringUtils.isNotBlank(oaid) && (oaid.length >= 16 && oaid.length <= 64 && !oaid.matches(allZero) || oaid.matches(md5Ptn))) {
oaid oaid
} else { } else {
"" ""
} }
val f_gaid = if (StringUtils.isNotBlank(gaid) && gaid.matches(didPtn) && !gaid.matches(allZero)) { val f_gaid = if (StringUtils.isNotBlank(gaid) && (gaid.matches(didPtn) && !gaid.matches(allZero) || gaid.matches(md5Ptn))) {
gaid gaid
} else { } else {
"" ""
} }
val f_sysId = if (StringUtils.isNotBlank(sysId) && sysId.matches(didPtn) && !sysId.matches(allZero)) { val f_sysId = if (StringUtils.isNotBlank(sysId) && (sysId.matches(didPtn) && !sysId.matches(allZero) || sysId.matches(md5Ptn))) {
sysId sysId
} else { } else {
"" ""
} }
val f_bkupId = if (StringUtils.isNotBlank(bkupId) && bkupId.matches(didPtn) && !bkupId.matches(allZero)) { val f_bkupId = if (StringUtils.isNotBlank(bkupId) && (bkupId.matches(didPtn) && !bkupId.matches(allZero) || bkupId.matches(md5Ptn))) {
bkupId bkupId
} else { } else {
"" ""
...@@ -378,7 +392,7 @@ object Constant { ...@@ -378,7 +392,7 @@ object Constant {
|| f_ua.toLowerCase.contains("iphone") || f_ua.toLowerCase.contains("ipad")) { || f_ua.toLowerCase.contains("iphone") || f_ua.toLowerCase.contains("ipad")) {
"ios" "ios"
} else if (f_platform.contains("android") || f_osv.toLowerCase.contains("android") || f_ua.toLowerCase.contains("android") } else if (f_platform.contains("android") || f_osv.toLowerCase.contains("android") || f_ua.toLowerCase.contains("android")
|| f_imei.length >= 14 || (f_oaid.length >= 16 && f_oaid.length <= 64) || f_androidId.length >= 15 || f_gaid.length == 36) { || f_imei.length >= 14 || (f_oaid.length >= 16 && f_oaid.length <= 64) || f_androidId.length >= 15 || f_gaid.length == 36 || f_gaid == 32) {
"android" "android"
} else { } else {
"other" "other"
...@@ -395,5 +409,49 @@ object Constant { ...@@ -395,5 +409,49 @@ object Constant {
case class Result(device_id: String, device_type: String, one_id: String) extends Serializable case class Result(device_id: String, device_type: String, one_id: String) extends Serializable
case class OneIDScore(one_id: String, one_type: String, one_score: Double) extends Serializable case class OneIDScore(one_id: String, one_type: String, one_score: Double, one_version: String) extends Serializable
class CustomInterator(active_date: String, iter: Iterator[((String, String), Set[(String, String, Long)])],
idArray: Array[String], mainIDSet: Set[String]) extends Iterator[ArrayBuffer[((String, String), String)]] {
def hasNext: Boolean = {
iter.hasNext
}
def next: ArrayBuffer[((String, String), String)] = {
val kv = iter.next
val array = new ArrayBuffer[((String, String), String)]()
val tmpOneId = kv._1._1
val tmpOneIdType = kv._1._2
val iters = kv._2
val oneID = new JSONObject()
var minTypeIndex = idArray.indexOf(tmpOneIdType)
iters.foreach(t => {
if (idArray.indexOf(t._2) < minTypeIndex) {
minTypeIndex = idArray.indexOf(t._2)
}
if (tmpOneId.equals(t._1) || mainIDSet.contains(t._2)) {
val json = new JSONObject()
json.put("one_type", t._2)
json.put("one_date", active_date)
json.put("one_cnt", t._3)
oneID.put(t._1, json)
}
finalize()
})
array += (((tmpOneId, tmpOneIdType), oneID.toJSONString))
// if (idArray.indexOf(tmpOneIdType) > minTypeIndex) {
iters.foreach(itr => {
var oneJSON = new JSONObject()
if (oneID.containsKey(itr._1)) {
oneJSON.put(itr._1, oneID.getJSONObject(itr._1))
} else {
oneJSON = oneID
}
array += (((itr._1, itr._2), oneJSON.toJSONString))
finalize()
})
// }
array
}
}
} }
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment