package mobvista.dmp.datasource.gender;

import com.google.common.collect.Sets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

public class MergeInstallGenderMR {
    /**
     * author:LiuShuai
     *
     * @throws InterruptedException
     * @throws IOException
     * @throws ClassNotFoundException date  ：2017-01-18
     *                                desc  : 根据GetDspGenderMR与GetGaGenderMR的到的结果判断ExtractDeviceMR得到的结果里device_id有没有性别标记并打上标签
     */

    public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException {

        Configuration conf = new Configuration();
        conf.set("mapreduce.map.speculative", "true");
        conf.set("mapreduce.reduce.speculative", "true");
        conf.set("mapreduce.task.io.sort.mb", "500");
        conf.set("mapreduce.reduce.java.opts", "-Xmx1536m");
        conf.set("mapreduce.reduce.memory.mb", "2048");
        conf.set("mapreduce.reduce.shuffle.parallelcopies", "50");
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        Job job = Job.getInstance(conf, "MergeInstallGenderMR");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
        job.setJarByClass(MergeInstallGenderMR.class);

        job.setMapperClass(MergeInstallGenderMapper.class);

        job.setReducerClass(MergeInstallGenderReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
        FileInputFormat.addInputPath(job, new Path(otherArgs[2]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[3]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

    public static class MergeInstallGenderMapper extends Mapper<LongWritable, Text, Text, Text> {
        Text outKey = new Text();

        public void map(LongWritable key, Text value, Context context) {
            try {
                String line = value.toString();
                String[] fields = MRUtils.SPLITTER.split(line, -1);
                if (fields[1].equals("A")) {//dsp或ga
                    outKey.set(MRUtils.JOINER.join(fields[0], fields[4]));//device_id,device_type
                    context.write(outKey, value);
                } else if (fields[1].equals("B")) {//extract
                    outKey.set(MRUtils.JOINER.join(fields[0], fields[3]));//device_id,device_type
                    context.write(outKey, value);
                }
            } catch (Exception e) {
                return;
            }
        }
    }

    public static class MergeInstallGenderReducer extends Reducer<Text, Text, Text, Text> {
        Text outValue = new Text();

        public void reduce(Text key, Iterable<Text> values, Context context) {
            try {
                List<String> outList = new ArrayList<String>();//因为没有去重，申明一个List来临时存储value
                Set<String> genderSet = Sets.newHashSet();
                for (Text val : values) {
                    String value = val.toString();
                    String[] fields = MRUtils.SPLITTER.split(value, -1);
                    if (fields[1].equals("A")) {//说明来源于GetDspGenderMR或GetGaGenderMR
                        genderSet.add(fields[2]);  //性别
                    } else if (fields[1].equals("B")) {//说明来源于extract_device
                        outList.add(value);//加到list里
                    }
                }
                if (outList.size() > 0) {
                    for (String value : outList) {
                        String[] fields = MRUtils.SPLITTER.split(value, -1);
                        if (genderSet.size() != 1) {//device_id下没有性别标记或者多个不同性别标记
                            outValue.set(MRUtils.JOINER.join(fields[2],  //pkg  以#分隔
                                    "null", fields[3] + "#" + fields[4]  //device_type # update_date
                            ));
                            context.write(key, outValue);
                        } else {//device_id下有性别标记
                            String gender = "";
                            for (String gen : genderSet) {
                                gender = gen;
                            }
                            outValue.set(MRUtils.JOINER.join(fields[2],  //pkg
                                    gender, fields[3] + "#" + fields[4]   //device_type # update_date
                            ));//打上性别标签
                            context.write(key, outValue);
                        }
                    }
                }
            } catch (Exception e) {
                return;
            }
        }
    }
}
