MdsAdnRequestDailyMR.java 7.79 KB
package mobvista.dmp.datasource.adn.mapreduce;

import mobvista.dmp.common.CommonMapReduce;
import mobvista.dmp.common.CommonMapper;
import mobvista.dmp.format.RCFileOutputFormat;
import mobvista.dmp.util.BytesRefUtil;
import mobvista.dmp.util.MD5Util;
import mobvista.dmp.util.MRUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * author: fengliang
 * date  : 2017-08-22
 * desc  :
 */
public class MdsAdnRequestDailyMR extends CommonMapReduce {
    public MdsAdnRequestDailyMR(String name, Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass) {
        super(name, mapperClass, reducerClass);
    }

    public static void main(String[] args) throws Exception {
        start(new MdsAdnRequestDailyMR("mds adn request daily job", MdsAdnRequestMapper.class, MdsAdnRequestReducer.class), args);
    }

    public static class MdsAdnRequestMapper extends CommonMapper {
        private StringBuilder builder = new StringBuilder();
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] array = MRUtils.SPLITTER.split(value.toString(), -1);
          /*  if (array.length >= 54) {
                String date = array[0];
                String time = array[1];
                String appId = array[4];
                String platform = array[13];
                String deviceModel = array[16];
                String countryCode = array[19];
                String ip = array[34];
                String gaid = array[42];
                String idfa = array[43];
                String imei = array[35];
                String andriodId = array[37];
                String deviceBrand = array[45];
                 String extSysid = "";
                if (array.length >= 118) {
                    extSysid = array[117];
                }*/

            if (array.length >= 20) {

                String date = array[0];
                String time = array[1];
                String appId = array[3];
                String platform = array[4];
                String deviceModel = array[7];
                String countryCode = array[9];
                String ip = array[11];
                String gaid = array[15];
                String idfa = array[16];
                String imei = array[12];
                String andriodId = array[14];
                String deviceBrand = array[17];
                String extSysid =  array[18];
                String oaid =  array[21];

                builder.setLength(0);
                if (time.length() >= 6) {
                    builder.append(date)
                            .append(" ")
                            .append(time.substring(0, 2))
                            .append(":")
                            .append(time.substring(2, 4))
                            .append(":")
                            .append(time.substring(4));
                } else {
                    context.getCounter("DMP", "adn time length error").increment(1l);
                    return;
                }

                String requestTime = builder.toString();

                outValue.set(MRUtils.JOINER.join(requestTime, appId, ip, countryCode, deviceModel, deviceBrand));



                //新增自有Id
                String sysIdType = GetDevIdUtil.getExtSysId(extSysid);
                if (StringUtils.isNotBlank(sysIdType)) {
                    outKey.set(MRUtils.JOINER.join(sysIdType, platform));
                    context.write(outKey, outValue);
                }

                String devIdType = GetDevIdUtil.getIdByIdAndPlatform(gaid, idfa, extSysid, platform);
                if (StringUtils.isNotBlank(devIdType)) {
                    outKey.set(MRUtils.JOINER.join(devIdType, platform));
                    context.write(outKey, outValue);
                }

                if (!imei.isEmpty() && imei.matches(CommonMapReduce.imeiPtn) && "android".equals(platform)){
                    outKey.set(MRUtils.JOINER.join(imei, "imei", platform));
                    context.write(outKey, outValue);
                    // 卓萌需求,imei转换成imeimd5类型 2020.03.16
                    try {
                        String imeiMd5 = MD5Util.getMD5Str(imei);
                        outKey.set(MRUtils.JOINER.join(imeiMd5, "imeimd5", platform.toLowerCase()));
                        context.write(outKey, outValue);
                    }catch (Exception e){
                        e.printStackTrace();
                    }
                }

                /*
                if (gaid.matches(CommonMapReduce.didPtn)
                        && !gaid.equals(CommonMapReduce.allZero)) {
                    outKey.set(MRUtils.JOINER.join(gaid, "gaid", platform));
                    context.write(outKey, outValue);
                }

                if (idfa.matches(CommonMapReduce.didPtn)
                        && !idfa.equals(CommonMapReduce.allZero)) {
                    outKey.set(MRUtils.JOINER.join(idfa, "idfa", platform));
                    context.write(outKey, outValue);
                }
                 */
                if (! andriodId.isEmpty() &&andriodId.matches(CommonMapReduce.andriodIdPtn)  && "android".equals(platform)) {
                    outKey.set(MRUtils.JOINER.join(andriodId, "androidid", "android"));
                    context.write(outKey, outValue);
                }
                if (!oaid.isEmpty() && !CommonMapReduce.allZero.equals(oaid) && "android".equals(platform)  && !oaid.matches("^0+$") ) {
                    outKey.set(MRUtils.JOINER.join(oaid, "oaid", "android"));
                    context.write(outKey, outValue);
                }
            }
        }
    }

    public static class MdsAdnRequestReducer extends Reducer<Text, Text, NullWritable, BytesRefArrayWritable> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            for (Text val : values) {
                context.write(NullWritable.get(), BytesRefUtil.createRcOutValue(MRUtils.JOINER.join(key.toString(), val.toString())));
            }
        }
    }

    @Override
    protected void otherSetting(Job job, String[] args) throws Exception {
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(BytesRefArrayWritable.class);
        job.setOutputFormatClass(RCFileOutputFormat.class);
        job.getConfiguration().set("hive.io.rcfile.column.number.conf", "9");
        job.getConfiguration().set("mapreduce.output.fileoutputformat.compress.type",
                SequenceFile.CompressionType.BLOCK.toString());
     //   job.addFileToClassPath(new Path("s3://mob-emr-test/feng.liang/lib/hive-exec-0.13.1-amzn-3.jar"));
       job.addFileToClassPath(new Path("s3://mob-emr-test/dataplatform/env/hive/lib/hive-exec-2.3.3.jar"));
    }

    @Override
    protected void setOutputPath(Job job, String[] args) throws IOException {
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    }

    @Override
    protected void setInputPath(Job job, String[] args) throws IOException {
        FileInputFormat.addInputPath(job, new Path(args[0]));
    }
}