package mobvista.prd.datasource.tag.mapreduce.map;

import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import mobvista.prd.datasource.util.GsonUtil;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.util.regex.Pattern;

/**
 *
 * Created by fl on 2017/5/11.
 */
public class InterestTagMapper extends Mapper<LongWritable, Text, Text, LongWritable> {

    private Text outKey = new Text();
    private static final String dataSplit = "\t";
    private StringBuilder builder = new StringBuilder();
    private LongWritable outVal = new LongWritable(1);
    private Pattern pattern = Pattern.compile(dataSplit);

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        try {
            String[] valSplits = pattern.split(value.toString(), -1);
            if (valSplits.length >= 4) {
                JsonArray array = GsonUtil.String2JsonArray(valSplits[3]);
                for (JsonElement element : array) {
                    JsonObject obj = element.getAsJsonObject();
                    JsonArray tagArray = obj.get("tag").getAsJsonArray();
                    for (JsonElement tagElement : tagArray) {
                        JsonObject tagObj = tagElement.getAsJsonObject();
                        String firstLevel = tagObj.get("1") != null ? tagObj.get("1").getAsString() : "";
                        String sencondLevel = tagObj.get("2") != null ? tagObj.get("2").getAsString() : "";

                        builder.setLength(0);
                        builder.append(firstLevel).append(dataSplit).append(sencondLevel);

                        outKey.set(builder.toString());
                        context.write(outKey, outVal);
                    }
                }
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}
