package mobvista.prd.datasource.test;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.io.Files;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import mobvista.dmp.util.MRUtils;
import mobvista.prd.datasource.util.GsonUtil;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.Text;
import org.apache.poi.ss.usermodel.*;
import org.codehaus.jackson.map.ObjectMapper;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.*;
import java.nio.charset.Charset;
import java.util.*;

/**
 * Created by fl on 2017/6/13.
 */
public class Test {
    StringBuilder builder = new StringBuilder();
    private ObjectMapper objectMapper = new ObjectMapper();

    private final static String GOOGLE_URL = "https://play.google.com/store/apps/details?id=";

    public static void main(String[] args) throws Exception {
        List<String> list = Files.readLines(new File("/Users/fl/Downloads/segment_tag"), Charset.defaultCharset());
        List<String> list1 = Files.readLines(new File("/Users/fl/Downloads/dim_dc_segemnt"), Charset.defaultCharset());
        Set<String> set = new HashSet<>();
        Set<String> set1 = new HashSet<>();
        for (int i =0; i < list1.size(); i++) {
            String[] strs1 = list1.get(i).split("\t");
            set1.add(strs1[0]);
        }

        for (int i =0; i < list.size(); i++) {
            String[] strs1 = list.get(i).split("\t");
            set.add(strs1[0]);
        }

        System.out.println(set.size());
        System.out.println(set1.size());

        for (String str : set1) {
            if (!set.contains(str)) {
                System.out.println(str);
            }
        }

    }

    public static String getCellValue(Cell cell){
        if (cell != null) {
            if (cell.getCellType() == 0) {
                return String.valueOf((long)cell.getNumericCellValue());
            } else {
                return cell.getStringCellValue();
            }
        } else {
            return "";
        }
    }

    public void search() {
        int nums[] = {1,2,3,4,6,6,7,8,9,10};
        int left = 0, mid = 0, right = nums.length - 1, key = 6;
//        while (left < right) {
//            mid = left + ((right - left)>> 1);
//            if (nums[mid] < key) {
//                left = mid + 1;
//            } else {
//                right = mid;
//            }
//        }
//
//        if (nums[right] == key) {
//            System.out.println(right);
//        } else {
//            System.out.println("no found");
//        }
        while (left < right) {
            mid = left + ((right - left + 1) >> 2);
            if (nums[mid] <= key) {
                left = mid;
            } else {
                right = mid - 1;
            }
        }

        if (nums[left] == key) {
            System.out.println(left);
        } else {
            System.out.println("no found");
        }
    }


    public String parse(String packageName, String html) throws IOException {
        if (html == null) {
            return null;
        }
        Element mainContent = Jsoup.parse(html).select("div[class=main-content]").first();
        Element infoBoxTop = mainContent.select("div[class=info-box-top]").first();
        String url = GOOGLE_URL + packageName;
        String appName = infoBoxTop.select("div[class=id-app-title]").first().text();
        Elements cateElements = infoBoxTop.select("a[class=document-subtitle category]");
        List<Map<String, String>> categoryList = Lists.newArrayList();
        for (Element category: cateElements) {
            String code = category.attr("href").replace("/store/apps/category/", "");
            String name = category.child(0).text();
            Map<String, String> map = Maps.newHashMap();
            map.put("code", code);
            map.put("name", name);
            categoryList.add(map);
        }
        Elements elements = mainContent.select("span[class=reviews-num");
        String userRatingCount = elements.size() == 0 ? "0": elements.first().text(); //用户评分
        Element otherInfoElement = mainContent.select("div[class=details-section metadata]").first();
        elements = otherInfoElement.select("div[itemprop=numDownloads");
        String download = elements.size() == 0 ? "0":elements.first().text();
        return MRUtils.JOINER.join(
                packageName,
                appName,
                objectMapper.writeValueAsString(categoryList),
                userRatingCount,
                download,
                url
        );
    }

    private static int BKDRHash(String deviceId) {
        char[] chars = deviceId.toCharArray();
        int hash = 0;
        for (char c: chars) {
            hash += 131 * hash + c;
        }
        return hash;
    }

    public static byte[] intToBytes(int i) {
        byte[] value = new byte[4];
        value[3] = (byte) ((i & 0xff000000) >> 24);
        value[2] = (byte) ((i & 0x00ff0000) >> 16);
        value[1] = (byte) ((i & 0x0000ff00) >> 8);
        value[0] = (byte) ((i & 0x000000ff));
        return value;
    }

    protected void reduce(Text key, Set<Text> values) throws IOException, InterruptedException {
        Set<String> pkgSet = Sets.newHashSet();
        Set<Integer> segmentIdSet = Sets.newHashSet();
        String[] outFields = null; //new String[11];
        Set<String> androidId = Sets.newHashSet();
        for (Text value: values) {
            String line = value.toString();
            String[] array = MRUtils.SPLITTER.split(line, -1);
            if (array.length != 16) {
                continue;
            }
            if (outFields == null) {
                outFields = new String[13];
                outFields[0] = key.toString(); //device_id \t device_type
                for (int i = 1; i < 13; i++) {
                    outFields[i] = array[i - 1];
                }
            }
            pkgSet.add(array[8]);
            if (!array[9].isEmpty()) {
                androidId.add(array[9]);
            }

            builder.setLength(0);
//            builder.append(detailOutPath)
//                    .append(",")               //用于分隔输出路径和输出数据真实key，逗号左边为路径，后边为key
//                    .append(key.toString())    //device_id + "\t" + device_type
//                    .append("\t")
//                    .append(array[0]);         //platform
//            outKey.set(builder.toString());
            System.out.println("outValue = " + MRUtils.JOINER.join(
                    array[14], // time
                    array[2],  // ip
                    array[11], // geo
                    array[12], // longitude
                    array[13] // latitude
            ));

            String segment = array[15];
            if (segment.startsWith("[")) {
                JsonArray segArray = GsonUtil.String2JsonArray(segment);
                for (JsonElement segElement : segArray) {
                    segmentIdSet.add(segElement.getAsJsonObject().get("id").getAsInt());
                }
            }
//            context.write(outKey, outValue);

        }
        if (outFields != null) {
            outFields[9] = objectMapper.writeValueAsString(pkgSet);
            outFields[10] = MRUtils.join(androidId, ",");
            outFields[12] = objectMapper.writeValueAsString(segmentIdSet);
//            outKey.set(outPath + ", ");
//            outValue.set(MRUtils.join(outFields, "\t"));
//            context.write(outKey, outValue);
            System.out.println("outValue = " + MRUtils.join(outFields, "\t"));
        }
    }
}
