package mobvista.prd.datasource.table;

import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.type.JavaType;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Created by Administrator on 2017/3/28 0028.
 * desc：得到每个pkg_name的URl，分类
 */
public class AppUrlMR {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        conf.set("mapreduce.reduce.memory.mb", "1472");

        Job job = Job.getInstance(conf, "CalcDeviceAgeMR");
        job.setJarByClass(MergeAppIDMR.class);
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

        job.setMapperClass(MergeAppMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

    public static class MergeAppMapper extends Mapper<LongWritable, Text, Text, Text> {
        Map<String, String> dspPackageMap = Maps.newHashMap();
        Map<String, String> iosPackageMap = Maps.newHashMap();
        Text outKey = new Text();
        Text outValue = new Text();
        ObjectMapper objectMapper = new ObjectMapper();
        JavaType listType = objectMapper.getTypeFactory().constructCollectionType(ArrayList.class, String.class);

        public void setup(Context context) throws IOException, InterruptedException {
            //读取CalcPackageDictMR的结果，放入Map中
            String iosPath = context.getConfiguration().get("iosFile");
            iosPackageMap = getAppUrl(iosPath,0,11,6,context);
            String dspPath = context.getConfiguration().get("adrFile");
            dspPackageMap = getAppUrl(dspPath,0,5,2,context);
        }

        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            try {
                String filePath = context.getConfiguration().get("map.input.file");
                String line = value.toString();
                String[] fields = line.split("\t", -1);
                if (filePath.contains("/etl_dsp_request_daily/")) {//dsp
                    List<String> packageNameList = objectMapper.readValue(fields[10], listType);//json转换成list
                    for (String packageName : packageNameList) {
                        if (dspPackageMap.containsKey(packageName)) {
                            outKey.set(fields[0]);
                            outValue.set(fields[1] + "\t" + packageName + "\t" + dspPackageMap.get(packageName) + "\t" + fields[2]);
                            context.write(outKey, outValue);
                        } else if (iosPackageMap.containsKey(packageName)) {
                            outKey.set(fields[0]);
                            outValue.set(fields[1] + "\t" + packageName + "\t" + iosPackageMap.get(packageName) + "\t" + fields[2]);
                            context.write(outKey, outValue);
                        } else {
                            outKey.set(fields[0]);
                            outValue.set(fields[1] + "\t" + packageName + "\t" + "" + "\t" + "" + "\t" + fields[2]);
                            context.write(outKey, outValue);
                        }

                    }
                } else if (filePath.contains("/etl_adn_sdk_request_daily/")) {//m系统
                    if (dspPackageMap.containsKey(fields[4])) {
                        outKey.set(fields[0]);
                        outValue.set(fields[1] + "\t" + fields[4] + "\t" + dspPackageMap.get(fields[4]) + "\t" + fields[2]);
                        context.write(outKey, outValue);
                    } else if (iosPackageMap.containsKey(fields[4])) {
                        outKey.set(fields[0]);
                        outValue.set(fields[1] + "\t" + fields[4] + "\t" + iosPackageMap.get(fields[4]) + "\t" + fields[2]);
                        context.write(outKey, outValue);
                    } else {
                        outKey.set(fields[0]);
                        outValue.set(fields[1] + "\t" + fields[4] + "\t" + "" + "\t" + "" + "\t" + fields[2]);
                        context.write(outKey, outValue);
                    }
                } else {
                    context.getCounter("DMP", "fields.length error").increment(1l);
                }
            } catch (Exception e) {
               e.printStackTrace();
            }
        }
    }

    public static Map<String, String> getAppUrl(String path, int id, int url, int category, Mapper.Context context)
            throws IOException {
        ObjectMapper objectMapper = new ObjectMapper();
        JavaType listType = objectMapper.getTypeFactory().constructCollectionType(ArrayList.class, Map.class);
        Map<String, String> packageMap = Maps.newHashMap();
        FileSystem fileSystem = FileSystem.get(URI.create(path), context.getConfiguration());
        FileStatus[] statuses = fileSystem.listStatus(new Path(path));

        BufferedReader reader = null;
        for (FileStatus status : statuses) {
            try {
                reader = new BufferedReader(new InputStreamReader(fileSystem.open(status.getPath())));
                String readLine = "";
                while ((readLine = reader.readLine()) != null) {
                    String[] arr = readLine.split("\t", -1);
                    if (arr[2].contains("name")) {
                        List<Map<String, String>> packageNameList = objectMapper.readValue(arr[2], listType);//json转换成list
                        packageMap.put(arr[id],arr[5] + "\t"+packageNameList.get(0).get("name"));
                    } else {
                        packageMap.put(arr[id], arr[url] + "\t" + arr[category]);
                    }
                }
            } finally {
                if (reader != null) {
                    reader.close();
                }
            }
        }
        return packageMap;
    }
}
