Commit 59a91aaf by WangJinfeng

init id-mapping v1.0

parent 4fff73b8
No related merge requests found
package mobvista.dmp.datasource.id_mapping
import com.alibaba.fastjson.JSONObject
import mobvista.dmp.common.MobvistaConstant._
import org.apache.commons.lang3.StringUtils
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
import java.net.URLDecoder
import scala.collection.mutable.ArrayBuffer
/**
* @package: mobvista.dmp.datasource.id_mapping
......@@ -243,6 +245,18 @@ object Constant {
| GROUP BY imei, android_id, pkg_name, oaid, gaid, sysid, bkupid, xwho, user_id, country, ip, ua, brand, model, os_version, osv_upt, upt
|""".stripMargin
val ios_id_mapping_sql_v2: String =
"""
|SELECT idfa, idfv, pkg_name, sysid, bkupid, xwho, user_id, country, ip, ua, brand, model, os_version, osv_upt, upt, cnt
| FROM dws.dws_device_id_ios_frequency WHERE dt = '@date' @filter_country
|""".stripMargin
val android_id_mapping_sql_v2: String =
"""
|SELECT imei, android_id, pkg_name, oaid, gaid, sysid, bkupid, xwho, user_id, country, ip, ua, brand, model, os_version, osv_upt, upt, cnt
| FROM dws.dws_device_id_android_frequency WHERE dt = '@date' @filter_country
|""".stripMargin
val old_id_mapping_sql: String =
"""
|
......@@ -282,42 +296,42 @@ object Constant {
def process(idfa: String, idfv: String, pkg_name: String, imei: String, androidId: String, oaid: String, gaid: String, sysId: String,
bkupId: String, country: String, ip: String, ua: String, brand: String, model: String, os_version: String, osv_upt: String,
upt: String, network_type: String, platform: String, cnt: Long): (String, Row) = {
val f_idfa = if (StringUtils.isNotBlank(idfa) && idfa.matches(didPtn) && !idfa.matches(allZero)) {
val f_idfa = if (StringUtils.isNotBlank(idfa) && (idfa.matches(didPtn) && !idfa.matches(allZero) || idfa.matches(md5Ptn))) {
idfa
} else {
""
}
val f_idfv = if (StringUtils.isNotBlank(idfv) && idfv.matches(didPtn) && !idfa.matches(allZero)) {
val f_idfv = if (StringUtils.isNotBlank(idfv) && (idfv.matches(didPtn) && !idfa.matches(allZero) || idfv.matches(md5Ptn))) {
idfv
} else {
""
}
val f_imei = if (StringUtils.isNotBlank(imei) && imei.matches(imeiPtn) && !imei.matches(imeiPtnAll)) {
val f_imei = if (StringUtils.isNotBlank(imei) && (imei.matches(imeiPtn) && !imei.matches(imeiPtnAll) || imei.matches(md5Ptn))) {
imei
} else {
""
}
val f_androidId = if (StringUtils.isNotBlank(androidId) && androidId.matches(andriodIdPtn) && !androidId.matches(andriodIdAll)) {
val f_androidId = if (StringUtils.isNotBlank(androidId) && (androidId.matches(andriodIdPtn) && !androidId.matches(andriodIdAll) || androidId.matches(md5Ptn))) {
androidId
} else {
""
}
val f_oaid = if (StringUtils.isNotBlank(oaid) && oaid.length >= 16 && oaid.length <= 64) {
val f_oaid = if (StringUtils.isNotBlank(oaid) && (oaid.length >= 16 && oaid.length <= 64 && !oaid.matches(allZero) || oaid.matches(md5Ptn))) {
oaid
} else {
""
}
val f_gaid = if (StringUtils.isNotBlank(gaid) && gaid.matches(didPtn) && !gaid.matches(allZero)) {
val f_gaid = if (StringUtils.isNotBlank(gaid) && (gaid.matches(didPtn) && !gaid.matches(allZero) || gaid.matches(md5Ptn))) {
gaid
} else {
""
}
val f_sysId = if (StringUtils.isNotBlank(sysId) && sysId.matches(didPtn) && !sysId.matches(allZero)) {
val f_sysId = if (StringUtils.isNotBlank(sysId) && (sysId.matches(didPtn) && !sysId.matches(allZero) || sysId.matches(md5Ptn))) {
sysId
} else {
""
}
val f_bkupId = if (StringUtils.isNotBlank(bkupId) && bkupId.matches(didPtn) && !bkupId.matches(allZero)) {
val f_bkupId = if (StringUtils.isNotBlank(bkupId) && (bkupId.matches(didPtn) && !bkupId.matches(allZero) || bkupId.matches(md5Ptn))) {
bkupId
} else {
""
......@@ -378,7 +392,7 @@ object Constant {
|| f_ua.toLowerCase.contains("iphone") || f_ua.toLowerCase.contains("ipad")) {
"ios"
} else if (f_platform.contains("android") || f_osv.toLowerCase.contains("android") || f_ua.toLowerCase.contains("android")
|| f_imei.length >= 14 || (f_oaid.length >= 16 && f_oaid.length <= 64) || f_androidId.length >= 15 || f_gaid.length == 36) {
|| f_imei.length >= 14 || (f_oaid.length >= 16 && f_oaid.length <= 64) || f_androidId.length >= 15 || f_gaid.length == 36 || f_gaid == 32) {
"android"
} else {
"other"
......@@ -395,5 +409,49 @@ object Constant {
case class Result(device_id: String, device_type: String, one_id: String) extends Serializable
case class OneIDScore(one_id: String, one_type: String, one_score: Double) extends Serializable
case class OneIDScore(one_id: String, one_type: String, one_score: Double, one_version: String) extends Serializable
class CustomInterator(active_date: String, iter: Iterator[((String, String), Set[(String, String, Long)])],
idArray: Array[String], mainIDSet: Set[String]) extends Iterator[ArrayBuffer[((String, String), String)]] {
def hasNext: Boolean = {
iter.hasNext
}
def next: ArrayBuffer[((String, String), String)] = {
val kv = iter.next
val array = new ArrayBuffer[((String, String), String)]()
val tmpOneId = kv._1._1
val tmpOneIdType = kv._1._2
val iters = kv._2
val oneID = new JSONObject()
var minTypeIndex = idArray.indexOf(tmpOneIdType)
iters.foreach(t => {
if (idArray.indexOf(t._2) < minTypeIndex) {
minTypeIndex = idArray.indexOf(t._2)
}
if (tmpOneId.equals(t._1) || mainIDSet.contains(t._2)) {
val json = new JSONObject()
json.put("one_type", t._2)
json.put("one_date", active_date)
json.put("one_cnt", t._3)
oneID.put(t._1, json)
}
finalize()
})
array += (((tmpOneId, tmpOneIdType), oneID.toJSONString))
// if (idArray.indexOf(tmpOneIdType) > minTypeIndex) {
iters.foreach(itr => {
var oneJSON = new JSONObject()
if (oneID.containsKey(itr._1)) {
oneJSON.put(itr._1, oneID.getJSONObject(itr._1))
} else {
oneJSON = oneID
}
array += (((itr._1, itr._2), oneJSON.toJSONString))
finalize()
})
// }
array
}
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment