AppidPackageDictMR.java 4.89 KB
package mobvista.dmp.datasource.setting;

import com.google.common.base.Joiner;
import com.google.common.collect.Maps;
import mobvista.dmp.common.CommonMapReduce;
import mobvista.dmp.common.CommonMapper;
import mobvista.dmp.common.CommonReducer;
import mobvista.dmp.util.MRUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.Map;
import java.util.regex.Pattern;

/**
 * author: houying
 * date  : 17-2-10
 * desc  :
 */
public class AppidPackageDictMR extends CommonMapReduce {

    public AppidPackageDictMR(String name, Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass) {
        super(name, mapperClass, reducerClass);
    }

    @Override
    protected void setOutputPath(Job job, String[] args) throws IOException {
        FileOutputFormat.setOutputPath(job, new Path(args[2]));
    }

    @Override
    protected void setInputPath(Job job, String[] args) throws IOException {
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileInputFormat.addInputPath(job, new Path(args[1]));
    }

    public static void main(String[] args) throws Exception {
        start(new AppidPackageDictMR("appid package mapping", AppidPackageMapper.class, AppidPackageReducer.class), args);
    }

    public static class AppidPackageMapper extends CommonMapper {
        private Pattern appidPtn;
        private Pattern platformPtn;
        private Joiner joiner;
        private Text outKey;
        private Text outValue;

        public AppidPackageMapper() {
            this.appidPtn = Pattern.compile("[0-9]{5,10}");
            this.platformPtn = Pattern.compile("\\d");
            this.joiner = Joiner.on(",");
            this.outKey = new Text();
            this.outValue = new Text();
        }

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] array = line.split(",", 5);
            if (array.length < 3) {
                CommonMapReduce.setMetrics(context, "DMP", "column_num_error", 1);
                return;
            }
            if (array.length == 3 && !"null".equals(array[1]) && appidPtn.matcher(array[0]).matches()
                    && platformPtn.matcher(array[2]).matches()) { //原 appid, package_name, platform
                outKey.set(array[0]);
                outValue.set(joiner.join("A", array[1], array[2]));
                context.write(outKey, outValue);
            } else {
                if (appidPtn.matcher(array[0]).matches() && platformPtn.matcher(array[1]).matches()
                        && !"null".equals(array[3])) {
                    outKey.set(array[0]);
                    outValue.set(joiner.join("B", array[3], array[1])); //package_name, platform
                    context.write(outKey, outValue);
                }
            }
        }
    }

    public static class AppidPackageReducer extends CommonReducer {
        private Text outKey = new Text();
        private Joiner joiner = Joiner.on(",");

        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context)
                throws IOException, InterruptedException {

            String appid = key.toString();
            String pkgName = "";
            String platform = "";
            for (Text value : values) {
                String[] array = value.toString().split(",");
                if (array.length != 3) {
                    continue;
                }
                if (StringUtils.isBlank(pkgName) || StringUtils.isBlank(platform)) {
                    pkgName = array[1];
                    platform = array[2];
                }
                if ("A".equals(array[0])) {
                    pkgName = array[1];
                    platform = array[2];
                    break;
                }
                /*
                if (pkgName == null && platform == null) {
                    pkgName = array[0];
                    platform = array[1];
                }
                */
            }

            if (StringUtils.isNotBlank(pkgName) && StringUtils.isNotBlank(platform)) {
                outKey.set(joiner.join(appid, pkgName, platform));
                context.write(outKey, NullWritable.get());
            }

            /*
            if (pkgName != null && platform != null) {
                outKey.set(joiner.join(appid, pkgName, platform));
                context.write(outKey, NullWritable.get());
            }
            */
        }
    }
}