package mobvista.prd.datasource.tag.mapreduce.map;

import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import mobvista.prd.datasource.util.GsonUtil;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.regex.Pattern;

/**
 *  遍历age_and_proportion对应的json对象，该对象key为年龄段名称，value为概率
 *  概率值最大，表示该设备属于其key的年龄段
 *  如果最大概率小于0.5，则该设备的年龄为"unknown"
 *
 *  {"age_and_source":{"null":"null"},"age_and_proportion":{"60+":0.02093,"25-44":0.628605,"18-24":0.242791,"0-17":0.03,"45-59":0.077674}}
 * Created by fl on 2017/5/12.
 */
public class AgeTotalMapper extends Mapper<LongWritable, Text, Text, LongWritable> {

    private Text outKey = new Text();
    private static final String dataSplit = "\t";
    private LongWritable outVal = new LongWritable(1);
    private Pattern pattern = Pattern.compile(dataSplit);

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

        String[] valSplits = pattern.split(value.toString(), -1);
        if (valSplits.length >= 4) {
            JsonObject json = GsonUtil.String2JsonObject(valSplits[3]);
            JsonObject ageJson = json.get("age_and_proportion").getAsJsonObject();
            Iterator<Map.Entry<String, JsonElement>> itr = ageJson.entrySet().iterator();

            String age = null;
            double max = 0.0;
            Map.Entry<String, JsonElement> entry = null;
            while (itr.hasNext()) {
                entry = itr.next();
                double temp = entry.getValue().getAsDouble();
                if (temp > max) {
                    max = temp;
                    age = entry.getKey();
                }
            }

            if (max < 0.5d) {
                age = "unknow";
            }

            outKey.set(age);
            context.write(outKey, outVal);
        }
    }
}
