/**
 * 
 */
package mobvista.dmp.datasource.packagelist.mapreduce;

import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import mobvista.dmp.common.CommonMapReduce;
import mobvista.dmp.datasource.ga.mapreduce.vo.TextPair;
import mobvista.dmp.util.MRUtils;
import mobvista.prd.datasource.util.GsonUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.joda.time.format.DateTimeFormat;

import java.io.IOException;
import java.util.HashSet;
import java.util.Set;


/**
 * @author rongpei
 *
 */
public class MergePackageName extends Configured implements Tool {
    public static String regex = "^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}$";

	public static class MergePackageNameMap extends Mapper<LongWritable, Text, TextPair, Text> {
		private TextPair outKey;
		private Text outValue = new Text();

		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			String[] splits = MRUtils.SPLITTER.split(value.toString(), -1);
			if(splits.length >= 4){
			    String keyStr = MRUtils.JOINER.join(splits[0],splits[1],splits[2]);
				if ("adr".equals(splits[2])) {
                    keyStr = MRUtils.JOINER.join(splits[0],splits[1],"android");
				}
				String[] pkgSplits =splits[3].split("\0x1");
				for (int i = 0; i < pkgSplits.length; i++) {
				    String pkg = pkgSplits[i];
					if(!pkg.equals("")&&!pkg.startsWith("[{")){
						if(!pkg.matches(MergePackageName.regex)){
						    outKey = new TextPair(keyStr, "1");
                            outValue.set(pkg);
							context.write(outKey,outValue);
						}else{
							context.getCounter("DMP", "devivce_exceptions").increment(1);
						}
					} else if(!pkg.equals("")){
                        outKey = new TextPair(keyStr, "2");
                        outValue.set(pkg);
						context.write(outKey,outValue);
					}
				}
			} else {
                context.getCounter("DMP", "column_num_error").increment(1);
            }
		}
	}


	public static class MergePackageNameReduce extends Reducer<TextPair, Text, NullWritable, Text> {
        private String date;
        private String expireDate;
        protected  Text output = new Text();

        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            this.date = context.getConfiguration().get("task.date");
			this.expireDate = DateTimeFormat.forPattern("yyyy-MM-dd").parseDateTime(date).minusMonths(12).toString("yyyy-MM-dd");
        }
		
		@Override
		protected void reduce(TextPair key, Iterable<Text> values, Context context)
				throws IOException, InterruptedException {
			String[] keySplits = MRUtils.SPLITTER.split(key.getFirst().toString(), -1);
	        if (keySplits.length < 2 ){
	            return;
	        }
	        if (!keySplits[0].matches(MergePackageName.regex)) {
	            CommonMapReduce.setMetrics(context, "DMP","devivce_exceptions",1);
	            return;
	        }
            Set<JsonVO> jsonSet = new HashSet<JsonVO>();
            for (Text value : values) {
                String val = value.toString();
                if (!val.trim().startsWith("[{")) {
                    String[] array = MRUtils.SPLITTER.split(val, -1);
                    jsonSet.add(new JsonVO(array[0], this.date));
                } else { //pkg install list
                    JsonArray array = GsonUtil.String2JsonArray(val);
                    if (array.size() > 1000) { //安装app数超过1000
                        return;
                    }
                    for (JsonElement element: array) {
						JsonVO vo = GsonUtil.fromJson(element, JsonVO.class);
						if (vo.getDate().compareTo(this.expireDate) >= 0) {
							jsonSet.add(vo);
						}
                    }
                }
            }

            if (jsonSet.isEmpty()) {
            	return;
			}

	        output.set(MRUtils.JOINER.join(
	                key.getFirst().toString(),
                    GsonUtil.toJson(jsonSet)
	        ));
	        context.write(NullWritable.get(), output);
		}
	}
	
	public int run(String[] args) throws Exception {
		Job job = Job.getInstance(getConf(), "MergePackageName");
		job.setJarByClass(MergePackageName.class);
		job.setMapperClass(MergePackageNameMap.class);
		job.setReducerClass(MergePackageNameReduce.class);
		job.setMapOutputKeyClass(TextPair.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(NullWritable.class);
		job.setOutputValueClass(Text.class);
		job.setGroupingComparatorClass(TextPair.FirstComparator.class);
		job.setPartitionerClass(TextPair.FirstPartitioner.class);

        FileInputFormat.addInputPath(job, new Path(args[0])); //历史数据
        FileInputFormat.addInputPath(job, new Path(args[1]));//当天更新数据

        FileOutputFormat.setOutputPath(job, new Path(args[2])); // 设置reduce输出文件路径
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
		return job.waitForCompletion(true) ? 0 : 1;
	}

	public static void main(String[] args) throws Exception {
		int exitCode = 0;
		try {
			exitCode = ToolRunner.run(new Configuration(), new MergePackageName(), args);
		} catch (Exception e) {
			exitCode = -1;
			e.printStackTrace();
		} finally {
			System.exit(exitCode);
		}
	}
}
