package mobvista.dmp.datasource.apptag.crawler_spark;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import mobvista.dmp.util.HttpUtil;
import mobvista.dmp.util.MRUtils;
import org.apache.http.HttpHost;
import org.codehaus.jackson.map.ObjectMapper;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.List;
import java.util.Map;

/**
 * author: houying
 * date  : 16-11-3
 * desc  :
 */
public class AdrDetailVisitor  {
    private final String url = "https://play.google.com/store/apps/details?id=";
    private final  String GOOGLE_URL = "https://play.google.com/store/apps/details?id=";
    private  HttpHost httpHost;
    private final String date;
    protected int notFound = 0;
    private ObjectMapper objectMapper;

    public AdrDetailVisitor(HttpHost httpHost, String date) {
        this.httpHost = httpHost;
        this.objectMapper = new ObjectMapper();
        this.date = date;
    }

    public String visit(String packageName) throws IOException {
        String ret = "";
        try {
            HttpUtil.HttpResult result = HttpUtil.doGet(url + packageName, httpHost);
            if (result.getStatusCode() != 200 || result.getContent() == null) {
                notFound++;
                return null;
            }
            ret = parse(packageName,result.getContent());
        }catch (Exception e){
            e.printStackTrace();
        }
        return  ret;
    }

    public String parse(String packageName, String html) throws IOException {
        if (html == null) {
            return null;
        }
        if (html.contains("main-content")) {
            return parseNormal(packageName, html);
        } else {
            return parseNoNormal(packageName, html);
        }
    }


    /**
     * 解析hmtl中包含 main-content 的内容
     * @param packageName
     * @param html
     * @return
     * @throws IOException
     */
    public String parseNormal(String packageName, String html) throws IOException {
        Element mainContent = Jsoup.parse(html).select("div[class=main-content]").first();
        Element infoBoxTop = mainContent.select("div[class=info-box-top]").first();
        String url = GOOGLE_URL + packageName;
        String appName = infoBoxTop.select("div[class=id-app-title]").first().text();
        Elements cateElements = infoBoxTop.select("a[class=document-subtitle category]");
        List<Map<String, String>> categoryList = Lists.newArrayList();
        for (Element category: cateElements) {
            String code = category.attr("href").replace("/store/apps/category/", "");
            String name = category.child(0).text();
            Map<String, String> map = Maps.newHashMap();
            map.put("code", code);
            map.put("name", name);
            categoryList.add(map);
        }

        Elements elements = mainContent.select("span[class=reviews-num");
        String userRatingCount = elements.size() == 0 ? "0": elements.first().text(); //用户评分
        Element otherInfoElement = mainContent.select("div[class=details-section metadata]").first();
        elements = otherInfoElement.select("div[itemprop=numDownloads");
        String download = elements.size() == 0 ? "0":elements.first().text();
        return MRUtils.JOINER.join(
                packageName,
                appName,
                objectMapper.writeValueAsString(categoryList),
                userRatingCount,
                download,
                url,
                date,
                "",
                ""
        );
    }

    /**
     * 解析hmtl中不包含 main-content 的内容
     * @param packageName
     * @param html
     * @return
     * @throws IOException
     */
    public String parseNoNormal(String packageName, String html) throws IOException  {
        Element element = Jsoup.parse(html).select("body div").first();
        String appName = element.select("h1[itemprop='name']").first().text();
        Elements cateElements = element.select("a[itemprop='genre']");
        List<Map<String, String>> categoryList = Lists.newArrayList();
        for (Element category: cateElements) {
            /**
             * href 内容容易变化，replace 替换失败
             */
            String[] codes = category.attr("href").split("\\/");
            String code = codes[codes.length - 1];
            //  String code = category.attr("href").replace("https://play.google.com/store/apps/category/", "");
            String name = category.text();
            if (code !=null && !"".equals(code.trim()) && name != null && !"".equals(name)) {
                Map<String, String> map = Maps.newHashMap();
                map.put("code", code);
                map.put("name", name);
                categoryList.add(map);
            }
        }

        String userRatingCount = "";
        Elements rateElements = element.select("meta[itemprop='ratingCount']");
        if(rateElements.size() == 0){
            Elements es = element.select("span[class='AYi5wd TBRnV']");
            if(es != null && es.first() != null && es.first().text() != null){
                userRatingCount = es.first().text().replaceAll(",","");
            }
        }else{
            userRatingCount = rateElements.first().attr("content"); //用户评分
        }

        String download = "0";
        Elements otherInfoElements = element.select("c-wiz[jsrenderer='Wnurre'] div[class='hAyfc']");
        if (otherInfoElements.size() >= 3) {
            Elements downloadElements = otherInfoElements.get(2).select("span[class='htlgb']");
            if (downloadElements.size() > 0) {
                download = downloadElements.first().text();
            }
        }

        String artistName = "";
        Elements es = element.select("a[class='hrTbp R8zArc']");
        if(es != null && es.first() != null && es.first().text() != null){
            artistName = es.first().text();
        }

        String contentAdvisoryRating = "";
        es = element.select("img[class='T75of E1GfKc']");
        if(es != null){
            contentAdvisoryRating = es.attr("alt");
        }


        String url = GOOGLE_URL + packageName;
        return MRUtils.JOINER.join(
                packageName,
                appName,
                objectMapper.writeValueAsString(categoryList),
                userRatingCount,
                download,
                url,
                date,
                artistName,
                contentAdvisoryRating
        );
    }
}
