package mobvista.dmp.datasource.ga.mapreduce;


import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;


public class GaDeviceDailyMR extends Configured implements Tool {

    public static void main(String[] args) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new GaDeviceDailyMR(), args));
    }

    public int run(String[] strings) throws Exception {
        Configuration conf = getConf();
        conf.set("mapreduce.task.io.sort.mb", "500");
        conf.set("mapreduce.reduce.java.opts", "-Xmx1536m");
        conf.set("mapreduce.reduce.memory.mb", "2048");
        conf.set("mapreduce.reduce.shuffle.parallelcopies", "50");

        Job job = Job.getInstance(conf, "Ga device daily");
        job.setJarByClass(GaDeviceDailyMR.class);
        Path outputPath = new Path(strings[1]);
        FileInputFormat.addInputPath(job, new Path(strings[0]));
        FileSystem fileSystem = outputPath.getFileSystem(conf);
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }
        FileOutputFormat.setOutputPath(job, outputPath);
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setMapperClass(GaMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(GaReducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        return job.waitForCompletion(true) ? 0 : 1;
    }

    public static class GaReducer extends Reducer<Text, Text, Text, Text> {
        private Pattern splitPattern = Pattern.compile("\\|");
        private Set<String> platformSet = Sets.newHashSet("ios", "android");
        private Map<String, String> genderMap;
        private Pattern idfaPtn = Pattern.compile("^[0-9A-F\\-]+$");

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            genderMap = Maps.newHashMap();
            genderMap.put("F", "F");
            genderMap.put("FEMALE", "F");
            genderMap.put("X", "F");
            genderMap.put("M", "M");
            genderMap.put("MALE", "M");
            genderMap.put("Y", "M");
        }

        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            boolean handledGa = false;
            List<String> fieldList = null;
            for (Text value : values) {
                String[] fields = this.splitPattern.split(value.toString());
                if (fields.length >= 31 && !handledGa) {
                    fieldList = Lists.newArrayListWithExpectedSize(31);
                    handledGa = true;
                    String platform = fixPlatform(fields[1].toLowerCase(), key.toString());
                    switch (platform) {
                        case "ios":
                            fieldList.add("idfa");
                            break;
                        case "android":
                            fieldList.add("gaid");
                            break;
                        default:
                            fieldList.add("unknown");
                            break;
                    }
                    if (this.platformSet.contains(platform)) {
                        fieldList.add(platform);
                    } else {
                        fieldList.add("unknown");
                    }

                    // 将UK改为GB，明远需求，修改人冯亮 20170815
                    String countryCode = fields[2];
                    if ("GB".equalsIgnoreCase(countryCode)) {
                        countryCode = "UK";
                    }

                    fieldList.add(countryCode);
                    fieldList.add(fields[3]);
                    fieldList.add(nullToDefault(fields[4], "0"));
                    fieldList.add(nullToDefault(fields[5], "0"));
                    fieldList.add(nullToDefault(fields[6], "0"));
                    fieldList.add(nullToDefault(fields[7], "0"));
                    fieldList.add(nullToDefault(fields[8], "0"));
                    fieldList.add(nullToDefault(fields[9], "0"));
                    fieldList.add(nullToDefault(fields[10], "0"));
                    fieldList.add(nullToDefault(fields[11], "0"));
                    fieldList.add(nullToDefault(fields[12], "0"));
                    fieldList.add(nullToDefault(fields[13], "0"));
                    fieldList.add(nullToDefault(fields[14], "0"));
                    fieldList.add(nullToDefault(fields[15], "0"));
                    fieldList.add(nullToDefault(fields[16], "0"));
                    fieldList.add(nullToDefault(fields[17], "0.0"));
                    fieldList.add(nullToDefault(fields[18], "0"));
                    fieldList.add(nullToDefault(fields[19], "0.0"));
                    fieldList.add(nullToDefault(fields[20], "0"));
                    fieldList.add(nullToDefault(fields[21], "0.0"));
                    fieldList.add(fields[22]);
                    fieldList.add(fields[23]);
                    fieldList.add(standardizingGender(fields[24])); //gender
                    fieldList.add(fields[25]); //birth_year
                    fieldList.add(nullToDefault(fields[26], "0.0"));
                    fieldList.add(nullToDefault(fields[27], "0.0"));
                    fieldList.add(nullToDefault(fields[28], "0.0"));
                    fieldList.add(nullToDefault(fields[29], "0.0"));
                    fieldList.add(fields[30]);
                }
            }
            if (fieldList != null) {
                context.write(key, new Text(Joiner.on("\t").join(fieldList)));
            }
        }

        private String fixPlatform(String platform, String deviceId) {
            if (idfaPtn.matcher(deviceId).matches()) {
                return "ios";
            }
            return platform;
        }

        private String standardizingGender(String gender) {
            String g = genderMap.get(gender);
            return g == null ? gender:g;
        }

        private String nullToDefault(String field, String defaultValue) {
            return Strings.isNullOrEmpty(field) ? defaultValue : field;
        }
    }

    public static class GaMapper extends Mapper<LongWritable, Text, Text, Text> {
        private static final Logger logger = LoggerFactory.getLogger(GaMapper.class);
        private static int exceptionCount = 0;

        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            int index = value.toString().indexOf("|");
            if (index >= 0) {
                String deviceId = value.toString().substring(0, index);
                if ("(null)".equals(deviceId)) {
                    return;
                }
                context.write(new Text(deviceId), value);
            } else {
                exceptionCount += 1;
            }
        }

        protected void cleanup(Context context) throws IOException, InterruptedException {
            if (exceptionCount > 0) {
                logger.info("this mapper has occurred exception {} times", exceptionCount);
            }
        }
    }
}