package mobvista.prd.datasource.country.interest.mapreduce;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import mobvista.dmp.util.MRUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

import java.io.IOException;
import java.util.List;
import java.util.Set;

/**
 * Created by Administrator on 2017/5/12 0012.
 * desc : 每天m系统的设备加上国家
 */
public class MergeMInterestDailyMR {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
//        conf.set("mapreduce.reduce.memory.mb", "1472");

        Job job = Job.getInstance(conf, "MergeMInterestDailyMR");
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
        job.setNumReduceTasks(Integer.parseInt(otherArgs[3]));
        job.setJarByClass(MergeMInterestDailyMR.class);

        job.setMapperClass(MergeMInterestDailyMapper.class);

        job.setReducerClass(MergeMInterestDailyReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
    public static class MergeMInterestDailyMapper extends Mapper <LongWritable, Text, Text, Text> {
        Text outKey = new Text();
        Text outValue = new Text();
        public void map (LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            try {
                String filePath = context.getConfiguration().get("map.input.file");
                String[] arr = MRUtils.SPLITTER.split(value.toString(), -1);
                if (filePath.contains("/etl_adn_sdk_request_daily/")) {
                    outKey.set(MRUtils.JOINER.join(arr[0], arr[1]));//device_id,device_type
                    outValue.set(MRUtils.JOINER.join(arr[4], "daily"));//pkg_name
                    context.write(outKey, outValue);
                } else {
                    outKey.set(MRUtils.JOINER.join(arr[0], arr[1]));//device_id,device_type
                    outValue.set(arr[9]);//country
                    context.write(outKey, outValue);
                }
            } catch (Exception e) {
                return;
            }
        }
    }
    public static class MergeMInterestDailyReducer extends Reducer <Text, Text, Text, Text> {
        Text outKey = new Text();
        Text outValue = new Text();
        public void reduce (Text key,  Iterable<Text> values, Context context)
                throws IOException, InterruptedException {
            String country = "";
            int num =0;
            Set<String> set = Sets.newHashSet();
            for (Text val : values) {
                if (val.toString().contains("daily")) {
                    num = 1;
                    String pkg = val.toString().split("\t",-1)[0];
                    set.add(pkg);
                } else {
                    country = val.toString();
                }
            }
            if (num == 1 && !country.equals("")) {
                for (String pkg : set) {
                    outValue.set(MRUtils.JOINER.join(country, pkg));
                    context.write(key, outValue);//device_id,device_type,country,pkg
                }
            }
        }
    }
}
