package mobvista.dmp.datasource.apptag.crawler;

import com.google.common.io.Closeables;
import mobvista.dmp.util.HttpUtil;
import mobvista.dmp.util.MRUtils;
import org.apache.log4j.Logger;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;

/**
 * author: houying
 * date  : 16-11-4
 * desc  :
 */
public abstract class AbstractDetailParser implements AppInfoCrawler.DetailParser {
    private static final Logger logger = Logger.getLogger(AbstractDetailParser.class);
    private final String file;
    public final String date;
    private BlockingQueue<Pair<String, HttpUtil.HttpResult>> htmlQueue;
    private volatile boolean isStop;
    protected int notFound = 0;

    public AbstractDetailParser(String file, String date) {
        this.file = file;
        this.date = date;
        this.htmlQueue = new ArrayBlockingQueue<>(1000000);
    }

    public static class Pair<K, V> {
        public K first;
        public V second;

        public Pair(K first, V second) {
            this.first = first;
            this.second = second;
        }
    }

    @Override
    public void stop() {
        isStop = true;
        System.out.println(file + " stopped!");
    }

    @Override
    public void post(String packageName, HttpUtil.HttpResult httpResult) throws InterruptedException {
        htmlQueue.put(new Pair<>(packageName, httpResult));
    }

    @Override
    public void run() {
        isStop = false;
        BufferedWriter writer = null;
        try {
            writer = new BufferedWriter(new FileWriter(file));
            while (!isStop || !htmlQueue.isEmpty()) {
                Pair<String, HttpUtil.HttpResult> pair = htmlQueue.poll(1, TimeUnit.SECONDS);
                if (pair == null) {
                    continue;
                }
                String line = null;
                try {
                    HttpUtil.HttpResult result = pair.second;
                    if (result.getStatusCode() != 200 || result.getContent() == null) {
                        notFound++;
                        continue;
                    }
                    line = parse(pair.first, pair.second.getContent());
                } catch (Exception e) {
//                    logger.error("package : "+ pair.first +", html = " + pair.second.getContent(), e);
//                    logger.error("==================================");
                    logger.error(e);
                }
                if (line == null) {
                    continue;
                }
                writer.write(line);
                writer.newLine();
            }
            writer.flush();
            logger.info(file + " not found count: " + notFound);
        } catch (IOException | InterruptedException e) {
            logger.error("parse result fail", e);
        } finally {
            try {
                Closeables.close(writer, false);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}
