package mobvista.dmp.datasource.apptag.mapreduce;

import com.google.common.base.Strings;
import com.google.common.collect.Maps;
import mobvista.dmp.util.MRUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.type.JavaType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.*;

/**
 * author: houying
 * date  : 16-10-14
 * desc  :
 */
public class MergeAppInfoMRv2 extends Configured implements Tool {
    private static final ObjectMapper objectMapper = new ObjectMapper();

    public static class Category {
        String id;
        String mvFirstCategory;
        String mvFirstSubCategory;
        String mvSecondCategory;
        String mvSecondSubCategory;

        public Category(String key, String mvFirstCategory, String mvFirstSubCategory, String mvSecondCategory, String mvSecondSubCategory) {
            this.id = key;
            this.mvFirstCategory = mvFirstCategory;
            this.mvFirstSubCategory = mvFirstSubCategory;
            this.mvSecondCategory = mvSecondCategory;
            this.mvSecondSubCategory = mvSecondSubCategory;
        }

        public String getId() {
            return id;
        }

        public String getMvFirstCategory() {
            return mvFirstCategory;
        }

        public String getMvFirstSubCategory() {
            return mvFirstSubCategory;
        }

        public String getMvSecondCategory() {
            return mvSecondCategory;
        }

        public String getMvSecondSubCategory() {
            return mvSecondSubCategory;
        }
    }

    public static class MergeAppInfoMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
        private final static Logger logger = LoggerFactory.getLogger(MergeAppInfoMapper.class);
        private JavaType listStringType;
        private JavaType listMapType;
        private Map<String, Category> categoryMap;
        private Text outKey = new Text();
        private int exceptionCount;

        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            System.out.println("exception count: " + exceptionCount);
        }

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            listStringType = objectMapper.getTypeFactory().constructCollectionType(List.class, String.class);
            JavaType mapType = objectMapper.getTypeFactory().constructMapType(HashMap.class, String.class, String.class);
            listMapType = objectMapper.getTypeFactory().constructCollectionType(List.class, mapType);
            exceptionCount = 0;
            categoryMap = Maps.newHashMap();
            for (URI uri : context.getCacheFiles()) {
                FileSystem fileSystem = FileSystem.get(uri, context.getConfiguration());
                BufferedReader reader = new BufferedReader(new InputStreamReader(fileSystem.open(new Path(uri))));
                String line;
                while ((line = reader.readLine()) != null) {
                    String[] array = MRUtils.SPLITTER.split(line, -1);
                    if (array.length != 10) {
                        logger.info("size of line is {}: [{}]", array.length, line);
                        continue;
                    }
                    Category category = new Category(array[0], array[6], array[7], array[8], array[9]);
                    categoryMap.put(array[5], category); //tag_code为tag_code, [id, first_tag, second_tag]为value
                }
                reader.close();
            }
        }

        private Map<String, String> createCategoryMap(String id, String category, String subCategory) {
            Map<String, String> map = Maps.newHashMap();
            map.put("id", id);
            map.put("1", category);
            if (!Strings.isNullOrEmpty(subCategory)) {
                map.put("2", subCategory);
            }
            return map;
        }

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] array = MRUtils.SPLITTER.split(value.toString(), -1);
            String filePath = context.getConfiguration().get("map.input.file");
            if (filePath.contains("/dim_app_info_ios/") && array.length >= 10) { // ios
                List<String> categoryIdList = objectMapper.readValue(array[9], listStringType);
                //  List<Map<String, String>> categoryList = Lists.newArrayListWithExpectedSize(categoryIdList.size());
                Set<Map<String, String>> categoryList = new HashSet<>();
                for (String id : categoryIdList) {
                    Category cate = categoryMap.get(id);
                    if (cate == null) {
                        logger.info("did not found mvFirstCategory for code: {}", id);
                        continue;
                    }
                    categoryList.add(createCategoryMap(cate.getId(), cate.getMvFirstCategory(), cate.getMvFirstSubCategory()));
                    if (!Strings.isNullOrEmpty(cate.getMvSecondCategory())) {
                        categoryList.add(createCategoryMap(cate.getId(), cate.getMvSecondCategory(), cate.getMvSecondSubCategory()));
                    }
                }
                outKey.set(MRUtils.JOINER.join(
                        array[0],
                        array[1],
                        "ios",
                        objectMapper.writeValueAsString(categoryList)
                ));
                context.write(outKey, NullWritable.get());
            } else if (filePath.contains("/dim_app_info_adr/") && array.length >= 3) { // adr
                List<Map<String, String>> categoryList = objectMapper.readValue(array[2], listMapType);
                Set<Map<String, String>> resultList = new HashSet<>();
                for (Map<String, String> map : categoryList) {
                    String code = map.get("code");
                    //  去掉 code 中包含特殊字符的影响
                    String[] codes = code.split("\\/");
                    code = codes[codes.length - 1];

                    Category cate = categoryMap.get(code);
                    if (cate == null) {
                        logger.info("did not found mvFirstCategory for code: {}", code);
                        continue;
                    }
                    resultList.add(createCategoryMap(cate.getId(), cate.getMvFirstCategory(), cate.getMvFirstSubCategory()));
                    if (!Strings.isNullOrEmpty(cate.getMvSecondCategory())) {
                        resultList.add(createCategoryMap(cate.getId(), cate.getMvSecondCategory(), cate.getMvSecondSubCategory()));
                    }
                }
                outKey.set(MRUtils.JOINER.join(
                        array[0],
                        array[1],
                        "adr",
                        objectMapper.writeValueAsString(resultList)
                ));
                context.write(outKey, NullWritable.get());
            } else {
                exceptionCount++;
            }
        }

    }

    public static void main(String[] args) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new MergeAppInfoMRv2(), args));

    }

    @Override
    public int run(String[] args) throws Exception {
        Configuration conf = getConf();

        Job job = Job.getInstance(conf, "merge app tag");
        job.setJarByClass(MergeAppInfoMRv2.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileInputFormat.addInputPath(job, new Path(args[1]));


        Path outputPath = new Path(args[2]);
        FileSystem fileSystem = outputPath.getFileSystem(conf);
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }
        FileOutputFormat.setOutputPath(job, outputPath);
        FileOutputFormat.setCompressOutput(job, false);

        Path category = new Path(args[3]);
        for (FileStatus status : category.getFileSystem(conf).listStatus(category)) {
            job.addCacheFile(new URI(status.getPath().toString()));
        }

        job.setMapperClass(MergeAppInfoMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);

        job.setNumReduceTasks(1);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);
        return job.waitForCompletion(true) ? 0 : 1;
    }
}
