package mobvista.dmp.datasource.age.mapreduce;

import mobvista.dmp.util.MRUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Created by liushuai on 2017/2/16 0016.
 */
public class MergeInstallAgeMR {
    public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {

        Configuration conf = new Configuration();
        conf.set("mapreduce.map.speculative", "true");
        conf.set("mapreduce.reduce.speculative", "true");
        conf.set("mapreduce.task.io.sort.mb", "500");
        conf.set("mapreduce.reduce.java.opts", "-Xmx1536m");
        conf.set("mapreduce.reduce.memory.mb", "2048");
        conf.set("mapreduce.reduce.shuffle.parallelcopies", "50");
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        Job job = Job.getInstance(conf, "MergeInstallAgeMR");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
        job.setJarByClass(MergeInstallAgeMR.class);

        job.setMapperClass(MergeInstallAgeMapper.class);

        job.setReducerClass(MergeInstallAgeReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
        FileInputFormat.addInputPath(job, new Path(otherArgs[2]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[3]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

    public static class MergeInstallAgeMapper extends Mapper<LongWritable, Text, Text, Text> {
        Text outKey = new Text();

        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] fields = MRUtils.SPLITTER.split(line, -1);
            outKey.set(fields[0]);
            context.write(outKey, value);
        }
    }

    public static class MergeInstallAgeReducer extends Reducer<Text, Text, Text, Text> {
        Text outValue = new Text();

        public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            StringBuilder outputValue = new StringBuilder();
            StringBuilder outputValue2 = new StringBuilder();//最终out,包名和年龄
            List<String> outList = new ArrayList<String>();//先将来源为ExtractDeviceMR的数据暂存
            for (Text val : values) {
                String value = val.toString();
                String[] fields = MRUtils.SPLITTER.split(value, -1);
                if (fields[1].equals("A")) {//说明来源于GetDspAgeMR或GetGaAgeMR，对数据进行拼接
                    if (outputValue.length() == 0) {
                        outputValue.append(fields[2]);
                        outputValue.append("#");
                        outputValue.append(fields[3]);
                    } else {
                        outputValue.append("$");
                        outputValue.append(fields[2]);
                        outputValue.append("#");
                        outputValue.append(fields[3]);
                    }
                } else if (fields[1].equals("B")) {//说明来源于ExtractDeviceMR
                    outList.add(value);//加到list里
                }
            }
            if (outList.size() > 0) {
                for (String value : outList) {
                    String[] fields = MRUtils.SPLITTER.split(value, -1);
                    if (outputValue.length() == 0) {//device_id下没有年龄标记
                        outputValue2.append(fields[2]);
                        outputValue2.append("\t");
                        outputValue2.append("null");
                        outValue.set(MRUtils.JOINER.join(outputValue2.toString(), fields[3]));
                        context.write(key, outValue);
                    } else {//device_id下有年龄标记
                        outputValue2.append(fields[2]);
                        outputValue2.append("\t");
                        outputValue2.append(outputValue.toString());
                        outValue.set(MRUtils.JOINER.join(outputValue2.toString(), fields[3]));//打上年龄标签
                        context.write(key, outValue);
                    }
                }
            }
        }
    }
}
