package mobvista.dmp.datasource.setting;

import com.alibaba.fastjson.JSONObject;
import com.google.common.collect.Maps;
import mobvista.dmp.common.Constants;
import mobvista.dmp.util.MRUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Map;

/**
 * author: houying
 * date  : 17-2-10
 * desc  :
 */
public class NginxSettingMR extends Configured implements Tool {
    private static final Logger logger = LoggerFactory.getLogger(NginxSettingMR.class);
    @Override
    public int run(String[] args) throws Exception {
        Configuration conf = getConf();
        conf.set("mapreduce.map.speculative", "true");
        conf.set("mapreduce.reduce.speculative", "true");
        conf.set("mapreduce.task.io.sort.mb", "500");
        conf.set("mapreduce.reduce.java.opts", "-Xmx1536m");
        conf.set("mapreduce.reduce.memory.mb", "2048");
        conf.set("mapreduce.reduce.shuffle.parallelcopies", "50");
        Job job = Job.getInstance(conf, "nginx setting job");
        job.setJarByClass(this.getClass());

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileInputFormat.addInputPath(job, new Path(args[1]));
        FileOutputFormat.setOutputPath(job, new Path(args[2]));

        Path outputPath = FileOutputFormat.getOutputPath(job);
        FileSystem fileSystem = outputPath.getFileSystem(job.getConfiguration());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }
        job.setMapperClass(NginxSettingMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);
        return job.waitForCompletion(true) ? 0 : 1;
    }

    public static class NginxSettingMapper extends Mapper<LongWritable, Text, Text, NullWritable> {

        private final Map<String, Integer> table;
        private Text outValue = new Text();

        public NginxSettingMapper() {
            table = Maps.newHashMap();
            table.put("app_id", 0);
            table.put("platform", 1);
            table.put("os_version", 2);
            table.put("package_name", 3);
            table.put("app_version_name", 4);
            table.put("app_version_code", 5);
            table.put("orientation", 6);
            table.put("model", 7);
            table.put("android_id", 8);
            table.put("imei", 9);
            table.put("gaid", 10);
            table.put("mnc", 11);
            table.put("mcc", 12);
            table.put("brand", 13);
            table.put("language", 14);
            table.put("useragent", 15);
            table.put("network_type", 16);
            table.put("timezone", 17);
            table.put("sdk_version", 18);
            table.put("prop", 19);
        }

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String uri="";
            if(line.startsWith("[") && line.contains("req_info")) {
                String[] split = line.split("\t");
                if (split.length < 2) {
                    return;
                }
                JSONObject jsonObject = Constants.String2JSONObject(split[1]);
                if (jsonObject.containsKey("req_info")) {
                    uri = jsonObject.getString("req_info");
                }
            }else{
                int start = line.indexOf("GET ");
                if (start < 0) {
                    return;
                }
                start += 4;
                int end = line.lastIndexOf(" HTTP/");
                if (end < 0) {
                    return;
                }
                if (end <= start) {
                    logger.info(line);
                    return;
                }
                uri = line.substring(start, end);
            }
            if (StringUtils.isBlank(uri) && !uri.startsWith("/setting?") && !uri.startsWith("/appwall/setting?")) {
                return;
            }
            uri = uri.substring(uri.indexOf("?") + 1);
            Map<String, String> keyValues = splitToMap(uri, "&", "=");
            String[] ret = new String[table.size()];
            Map<String, String> prop = Maps.newHashMap();
            for (Map.Entry<String, String> entry: keyValues.entrySet()) {
                Integer index = table.get(entry.getKey());
                if (index == null) {
                    prop.put(entry.getKey(), entry.getValue());
                } else {
                    ret[index] = entry.getValue();
                }
            }
            ret[ret.length - 1] = MRUtils.joinMapToString(prop, "&", "=");
            outValue.set(MRUtils.join(ret, ","));
            context.write(outValue, NullWritable.get());
        }

        private Map<String, String> splitToMap(String uri, String sep, String keyValueSep) {
            Map<String, String> map = Maps.newHashMap();
            for (String keyValue: uri.split(sep)) {
                String[] array = keyValue.split(keyValueSep);
                if (array.length != 2) {
                    continue;
                }
                map.put(array[0], array[1]);
            }
            return map;
        }

    }

    public static void main(String[] args) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new NginxSettingMR(), args));
    }
}
