/**
 * 
 */
package mobvista.dmp.datasource.packagelist.mapreduce;

import mobvista.dmp.util.MRUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


/**
 * @author rongpei
 * 2017年4月27日
 */
public class MergeCampaignList extends Configured implements Tool{


	public static class MergeCampaignListMap extends Mapper<LongWritable, Text, Text, Text> {
	    private Text outKey = new Text();
		private Text outValue = new Text();

        private static final String ios = "ios";
        private static final String android = "adr";

		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String filePath = context.getConfiguration().get("map.input.file");
            String[] lines = MRUtils.SPLITTER.split(value.toString(), -1);;
		    if (lines.length >= 4) {
                String campaignId = lines[0];
                String appName = lines[3];

                outKey.set(campaignId);
                if (filePath.contains("/campaign_list/")) {
                    String packageName = lines[1];
                    String platform = lines[2];

                    packageName = getPackageName(packageName, platform);
                    if (StringUtils.isNotEmpty(packageName)) {
                        if ("1".equals(platform)) {
                            outValue.set(MRUtils.JOINER.join(android, packageName, appName, "new"));
                            context.write(outKey, outValue);
                        } else if ("2".equals(platform)) {
                            outValue.set(MRUtils.JOINER.join(ios, packageName, appName, "new"));
                            context.write(outKey, outValue);
                        }
                    }

                } else { // 全量数据
                    String platform = lines[1];
                    String packageName = lines[2];
                    String updateDate = lines[4];

                    packageName = getPackageName(packageName, platform);
                    if (StringUtils.isNotEmpty(packageName)) {
                        if (ios.equals(platform)) {
                            outValue.set(MRUtils.JOINER.join(ios, packageName, appName, updateDate));
                            context.write(outKey, outValue);
                        } else if (android.equals(platform)) {
                            outValue.set(MRUtils.JOINER.join(android, packageName, appName, updateDate));
                            context.write(outKey, outValue);
                        }
                    }
                }
            }
		}

        /**
         *
         * @param packageName
         * @param platform
         * @return
         */
        public String getPackageName(String packageName, String platform) {
            if (!"/".equals(packageName) && !packageName.startsWith(".")) {
                if (android.equals(platform)) {
                    return packageName.replace("=", "");
                } else {
                    Pattern pattern = Pattern.compile("\\d{5,}");
                    Matcher matcher = pattern.matcher(packageName);
                    if (matcher.find()) {
                        return matcher.group(0);
                    }
                }
            }
            return null;
        }

	}

	public static class MergeCampaignListReduce extends Reducer<Text, Text, Text, Text> {
		private String date;
        private Text outValue = new Text();

		@Override
		protected void setup(Context context) throws IOException, InterruptedException {
			date = context.getConfiguration().get("task.date");
		}

		@Override
		protected void reduce(Text key, Iterable<Text> value, Context context)
				throws IOException, InterruptedException {
            String[] splits = null;
			for (Text text : value) {
				splits = MRUtils.SPLITTER.split(text.toString(), -1);
                outValue.set(MRUtils.JOINER.join(splits[0], splits[1], splits[2], splits[3])); //platform、 packageName、appName、updateDate
                if(splits[3].equals("new")){
                    outValue.set(MRUtils.JOINER.join(splits[0], splits[1], splits[2], date));
                    break;
				}
			}
			context.write(key, outValue);
		}
	}


	public int run(String[] args) throws Exception {
		Path outputPath = new Path(args[2]);
	     FileSystem fileSystem = outputPath.getFileSystem(getConf());
	     if (fileSystem.exists(outputPath)) {
	         fileSystem.delete(outputPath, true);
	     }
		Job job = Job.getInstance(getConf(), "MergeCampaignList");
		job.setJarByClass(MergeCampaignList.class);
		FileInputFormat.addInputPath(job, new Path(args[0])); // 历史数据
		FileInputFormat.addInputPath(job, new Path(args[1]));//当日更新数据
		FileOutputFormat.setOutputPath(job, new Path(args[2])); // 设置reduce输出文件路径
		
		//job.setOutputFormatClass(TextOutputFormat.class);// 使用默认的output格格式
		FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
		job.setMapperClass(MergeCampaignListMap.class);
		job.setReducerClass(MergeCampaignListReduce.class);

	
		// 设置map的输出key和value类型
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);

		// 设置reduce的输出key和value类型
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		job.waitForCompletion(true);
		return job.isSuccessful() ? 0 : 1;
	}

	public static void main(String[] args) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new MergeCampaignList(), args));
	}

}
