Constant.scala 7.83 KB
package mobvista.dmp.datasource.toutiao

import java.io.ByteArrayOutputStream
import java.util
import java.util.Base64

import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{StringType, StructField, StructType}

import scala.collection.mutable.ArrayBuffer
import collection.JavaConverters._

/**
  * @package: mobvista.dmp.datasource.toutiao
  * @author: wangjf
  * @date: 2018/12/11
  * @time: 下午7:42
  * @email: jinfeng.wang@mobvista.com
  * @phone: 152-1062-7698
  */
object Constant {
  def schema: StructType = {
    StructType(StructField("tag_code", StringType) ::
      StructField("tag_type", StringType) ::
      StructField("tag_id", StringType) ::
      Nil)
  }

  def mapPart(iters: Iterator[Row]): Iterator[(String, (String, String))] = {
    val res = new util.ArrayList[(String, (String, String))]()
    while (iters.hasNext) {
      val ir = iters.next
      val device_id = ir.getAs(fieldName = "device_id").toString
      val tag_type = ir.getAs(fieldName = "tag_type").toString
      val tag_id = ir.getAs(fieldName = "tag_id").toString
      val active_id = ir.getAs(fieldName = "active_id").toString
      res.add((device_id + "\001" + active_id, (tag_type, tag_id)))
    }
    res.asScala.iterator
  }

  def flatMapCore(ir: (String, Iterable[(String, String)])): ArrayBuffer[(String, String)] = {
    val arrayBuffer = new ArrayBuffer[(String, String)]()
    val array = ir._1.split("\001")
    val device_id = array(0)
    val active_id = array(1)
    val categorySet = new util.HashSet[String]()
    val styleSet = new util.HashSet[String]()
    val ruleSet = new util.HashSet[String]()
    val vals = ir._2.iterator
    while (vals.hasNext) {
      val value = vals.next
      if ("01".equals(value._1)) {
        categorySet.add(value._2)
      } else if ("02".equals(value._1)) {
        styleSet.add(value._2)
      } else {
        ruleSet.add(value._2)
      }
    }
    var categoryCode = "00"
    var styleCode = "00"
    var ruleCode = "00"

    if (categorySet.isEmpty) {
      if (styleSet.isEmpty) {
        val ruleIter = ruleSet.iterator
        while (ruleIter.hasNext) {
          ruleCode = ruleIter.next
          arrayBuffer += ((device_id, categoryCode + styleCode + ruleCode + active_id))
        }
      } else {
        val styleIter = styleSet.iterator
        while (styleIter.hasNext) {
          styleCode = styleIter.next
          if (ruleSet.isEmpty) {
            arrayBuffer += ((device_id, categoryCode + styleCode + ruleCode + active_id))
          } else {
            val ruleIter = ruleSet.iterator
            while (ruleIter.hasNext) {
              ruleCode = ruleIter.next
              arrayBuffer += ((device_id, categoryCode + styleCode + ruleCode + active_id))
            }
          }
        }
      }
    } else {
      val categoryIter = categorySet.iterator
      while (categoryIter.hasNext) {
        categoryCode = categoryIter.next
        if (styleSet.isEmpty) {
          if (ruleSet.isEmpty) {
            arrayBuffer += ((device_id, categoryCode + styleCode + ruleCode + active_id))
          } else {
            val ruleIter = ruleSet.iterator
            while (ruleIter.hasNext) {
              ruleCode = ruleIter.next
              arrayBuffer += ((device_id, categoryCode + styleCode + ruleCode + active_id))
            }
          }
        } else {
          val styleIter = styleSet.iterator
          while (styleIter.hasNext) {
            styleCode = styleIter.next
            if (ruleSet.isEmpty) {
              arrayBuffer += ((device_id, categoryCode + styleCode + ruleCode + active_id))
            } else {
              val ruleIter = ruleSet.iterator
              while (ruleIter.hasNext) {
                ruleCode = ruleIter.next
                arrayBuffer += ((device_id, categoryCode + styleCode + ruleCode + active_id))
              }
            }
          }
        }
      }
    }
    arrayBuffer
  }

  def mapPartCore(iters: Iterator[(String, Iterable[(String, String)])]): Iterator[(String, String)] = {
    val res = new util.ArrayList[(String, String)]()
    while (iters.hasNext) {
      val ir = iters.next()
      val array = ir._1.split("\001")
      val device_id = array(0)
      val active_id = array(1)
      val categorySet = new util.HashSet[String]()
      val styleSet = new util.HashSet[String]()
      val ruleSet = new util.HashSet[String]()
      val vals = ir._2.iterator
      while (vals.hasNext) {
        val value = vals.next
        if ("01".equals(value._1)) {
          categorySet.add(value._2)
        } else if ("02".equals(value._1)) {
          styleSet.add(value._2)
        } else {
          ruleSet.add(value._2)
        }
      }
      var categoryCode = "00"
      var styleCode = "00"
      var ruleCode = "00"

      if (categorySet.isEmpty) {
        if (styleSet.isEmpty) {
          val ruleIter = ruleSet.iterator
          while (ruleIter.hasNext) {
            ruleCode = ruleIter.next
            res.add((device_id, categoryCode + styleCode + ruleCode + active_id))
          }
        } else {
          val styleIter = styleSet.iterator
          while (styleIter.hasNext) {
            styleCode = styleIter.next
            if (ruleSet.isEmpty) {
              res.add((device_id, categoryCode + styleCode + ruleCode + active_id))
            } else {
              val ruleIter = ruleSet.iterator
              while (ruleIter.hasNext) {
                ruleCode = ruleIter.next
                res.add((device_id, categoryCode + styleCode + ruleCode + active_id))
              }
            }
          }
        }
      } else {
        val categoryIter = categorySet.iterator
        while (categoryIter.hasNext) {
          categoryCode = categoryIter.next
          if (styleSet.isEmpty) {
            if (ruleSet.isEmpty) {
              res.add((device_id, categoryCode + styleCode + ruleCode + active_id))
            } else {
              val ruleIter = ruleSet.iterator
              while (ruleIter.hasNext) {
                ruleCode = ruleIter.next
                res.add((device_id, categoryCode + styleCode + ruleCode + active_id))
              }
            }
          } else {
            val styleIter = styleSet.iterator
            while (styleIter.hasNext) {
              styleCode = styleIter.next
              if (ruleSet.isEmpty) {
                res.add((device_id, categoryCode + styleCode + ruleCode + active_id))
              } else {
                val ruleIter = ruleSet.iterator
                while (ruleIter.hasNext) {
                  ruleCode = ruleIter.next
                  res.add((device_id, categoryCode + styleCode + ruleCode + active_id))
                }
              }
            }
          }
        }
      }
    }
    res.asScala.iterator
  }

  def mapProtoPart(iters: Iterator[(String, Iterable[String])]): Iterator[String] = {
    val res = new util.ArrayList[String]()
    var dmpData = DmpDataProto.DmpData.newBuilder()
    while (iters.hasNext) {
      val ir = iters.next()
      val id_item = dmpData.addIdListBuilder()
      id_item.setDataType(DmpDataProto.IdItem.DataType.IDFA_MD5)
      id_item.setId(ir._1)
      val tags = ir._2.iterator
      while (tags.hasNext) {
        val tag = tags.next
        id_item.addTags(tag)
      }
      id_item.build()
      if (dmpData.getIdListCount >= 10000) { //  每行数据包的idList大小不能超过 10000
        val dmpDataProto = dmpData.build()
        val output = new ByteArrayOutputStream()
        dmpDataProto.writeTo(output)
        res.add(Base64.getEncoder.encodeToString(output.toByteArray))
        dmpData = DmpDataProto.DmpData.newBuilder()
      } else if (dmpData.getIdListCount > 0 && dmpData.getIdListCount < 10000 && !iters.hasNext) {
        val dmpDataProto = dmpData.build()
        val output = new ByteArrayOutputStream()
        dmpDataProto.writeTo(output)
        res.add(Base64.getEncoder.encodeToString(output.toByteArray))
      }
    }
    res.asScala.iterator
  }
}