package mobvista.dmp.datasource.dsp.mapreduce;

import com.google.common.collect.Sets;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import mobvista.dmp.common.CommonMapReduce;
import mobvista.dmp.common.CommonMapper;
import mobvista.dmp.common.CommonReducer;
import mobvista.dmp.util.MRUtils;
import mobvista.prd.datasource.util.GsonUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.codehaus.jackson.map.ObjectMapper;

import java.io.IOException;
import java.util.Set;
import java.util.regex.Pattern;

/**
 * author: houying
 * date  : 17-2-17
 * desc  :
 * output: device_id, device_type, platform, country_code, ip, gender, birthday, maker, model, os_version, package_list, androidIds, datetime, geoInfo, longitude, latitude
 */
public class MdsDspRequestDailyMR extends CommonMapReduce {

    public enum Fields{
        IDFA("idfa", 37),
        GAID("gaid", 34),
        PKG_NAME("package_name", 20),
        PLATFORM("platform",29),
        UPDATE_TIME("time",0),
        IP("ip",26),
        MAKER("maker",27),
        MODEL("model",28),
        OS_VERSION("os_version",30),
        COUNTRY_CODE("country_code",33),
        BIRTHDAY("birthday",39),
        GENDER("gender",40),
        EXT_ID("ext_id", 15),
        JSON_MSG("json_msg", 6);

        private String name ;
        private int  idx;
        Fields(String name, int idx){
            this.name = name;
            this.idx = idx;
        }
        public String getName(){
            return name;
        }
        public int getIdx(){
            return idx;
        }
    }

    public static class MdsDspRequestDailyMapper extends CommonMapper {
        private Pattern iosPkgPtn = Pattern.compile("^\\d+$");
        private Pattern adrPkgPtn = Pattern.compile("^[0-9a-zA-Z\\.]+$");
        private Pattern deviceIdPtn = Pattern.compile("^[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}$");
        private Pattern idSplitPtn = Pattern.compile(",");

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] array = MRUtils.SPLITTER.split(value.toString());
            if (array.length < 52) {
                CommonMapReduce.setMetrics(context, "DMP", "dsp_log_fields_num_error", 1);
                return;
            }
            String platform = array[Fields.PLATFORM.getIdx()];
            String packageName = array[Fields.PKG_NAME.getIdx()];
            String idfa = array[Fields.IDFA.getIdx()];
            String gaid = array[Fields.GAID.getIdx()];
            String jsonMsg = array[Fields.JSON_MSG.getIdx()];
            String deviceId = null;
            String deviceType = null;
            String geoInfo = null;
            String longitude = null; //经度
            String latitude = null;  //纬度

            // 去掉IOS中id开头包名中的id
            if (platform.equals("ios") && packageName.matches("^id\\d+$")) {
                packageName = packageName.replace("id", "");
            }

            if (platform.equals("ios") && idfa.length() > 4 && checkPkgName("ios", packageName)) {
                deviceId = idfa;
                deviceType = "idfa";
            } else if (platform.equals("android") && gaid.length() > 4 && checkPkgName("adr", packageName)) {
                deviceId = gaid;
                deviceType = "gaid";
            } else {
                return;
            }
            if (!deviceIdPtn.matcher(deviceId).matches()) {
                setMetrics(context, "DMP", "device_id_illegal_format", 1);
                return;
            }
            String[] ids = idSplitPtn.split(array[Fields.EXT_ID.getIdx()], -1);
            if (ids.length <= 5) {
                return;
            }
            String androidId = ids[5];

            //处理jsonMsg，获取geo属性值
            if (jsonMsg.startsWith("{")) {
                JsonObject json = GsonUtil.String2JsonObject(jsonMsg);
                JsonElement element = json.get("device");
                if (element != null && !element.isJsonNull()) {
                    JsonElement geoElement = element.getAsJsonObject().get("geo");
                    if (geoElement != null && !geoElement.isJsonNull()) {
                        geoInfo = geoElement.toString();
                        JsonObject geoJson = geoElement.getAsJsonObject();
                        JsonElement lonElement = geoJson.get("lon");
                        if (lonElement != null && !lonElement.isJsonNull()) {
                            longitude = lonElement.toString();
                        }
                        JsonElement latElement = geoJson.get("lat");
                        if (latElement != null && !latElement.isJsonNull()) {
                            latitude = latElement.toString();
                        }
                    }
                }
            }

            outKey.set(MRUtils.JOINER.join(deviceId, deviceType));
            outValue.set(MRUtils.JOINER.join(
                    platform,
                    array[Fields.COUNTRY_CODE.getIdx()],
                    array[Fields.IP.getIdx()],
                    array[Fields.GENDER.getIdx()],
                    array[Fields.BIRTHDAY.getIdx()],
                    array[Fields.MAKER.getIdx()],
                    array[Fields.MODEL.getIdx()],
                    array[Fields.OS_VERSION.getIdx()],
                    array[Fields.PKG_NAME.getIdx()],   //8
                    androidId,
                    array[Fields.UPDATE_TIME.getIdx()], //10
                    geoInfo,
                    longitude,
                    latitude
            )); // size = 14
            context.write(outKey, outValue);
        }

        private boolean checkPkgName(String platform, String pkg) {
            switch (platform) {
            case "ios":
                return iosPkgPtn.matcher(pkg).matches() || adrPkgPtn.matcher(pkg).matches();
            case "adr":
                return adrPkgPtn.matcher(pkg).matches();
            default:
                return false;
            }
        }
    }

    public static class MdsDspRequestDailyReducer extends CommonReducer {
        private Text outKey = new Text();

        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            for (Text val : values) {
                outKey.set(MRUtils.JOINER.join(key.toString(), val.toString()));
                context.write(outKey, NullWritable.get());
            }
        }
    }

    @Override
    protected void setOutputPath(Job job, String[] args) throws IOException {
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    }

    @Override
    protected void setInputPath(Job job, String[] args) throws IOException {
        FileInputFormat.addInputPath(job, new Path(args[0]));
    }

    public MdsDspRequestDailyMR(String name, Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass) {
        super(name, mapperClass, reducerClass);
    }

    public static void main(String[] args) throws Exception {
        start(new MdsDspRequestDailyMR("mds dsp request daily", MdsDspRequestDailyMapper.class, MdsDspRequestDailyReducer.class), args);
    }
}
