package mobvista.dmp.datasource.apptag.crawler;

import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.Files;
import com.google.common.io.LineProcessor;
import mobvista.dmp.util.HttpUtil;
import mobvista.dmp.util.MRUtils;
import org.apache.commons.cli.*;
import org.apache.log4j.Logger;

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
import java.util.logging.Level;
import java.util.regex.Pattern;

/**
 * author: houying
 * date  : 16-11-3
 * desc  :
 */
public class AppInfoCrawler {
    private static final Logger logger = Logger.getLogger(AppInfoCrawler.class);
    private static final int THREAD_NUM = 4;
    private static final Pattern adrPkgPtn = Pattern.compile("^\\w+\\..*$");
    private static final Pattern iosPkgPtn = Pattern.compile("^\\d{8,}$");
    private static final Pattern fixPkgPtn = Pattern.compile("^id\\w+");
    private final String adrFile;
    private final String iosFile;
    private final String bundleFile;
    private final String input;
    private final boolean useProxy;
    private final String date;
    private ExecutorService pool;
    private BlockingQueue<Task> taskQueue;
    private Map<String, DetailParser> parserMap;
    private Map<String, DetailVisitor> visitorMap;
    private List<Thread> threads;

    public AppInfoCrawler(String input, String iosFile, String adrFile, String bundleFile, String date, boolean useProxy) {
        this.input = input;
        this.iosFile = iosFile;
        this.adrFile = adrFile;
        this.bundleFile = bundleFile;
        this.date = date;
        this.useProxy = useProxy;
    }

    class Task implements Callable<Void> {
        private String packageName;
        private DetailVisitor visitor;
        private DetailParser parser;

        public void init(String packageName, DetailVisitor visitor, DetailParser parser) {
            this.packageName = packageName;
            this.visitor = visitor;
            this.parser = parser;
        }

        public Void call() throws InterruptedException {
            try {
                HttpUtil.HttpResult httpResult = visitor.visit(packageName);
                if (httpResult != null) {
                    parser.post(packageName, httpResult);
                }
            } catch (IOException | InterruptedException e) {
                e.printStackTrace();
            } finally {
                taskQueue.put(this);
            }
            return null;
        }
    }

    private void init() throws InterruptedException, IOException {
        pool = Executors.newFixedThreadPool(THREAD_NUM * 4);
        //初始化任务槽
        taskQueue = new ArrayBlockingQueue<>(THREAD_NUM * 10);
        for (int i = 0; i < THREAD_NUM * 10; i++) {
            taskQueue.put(new Task());
        }

        parserMap = Maps.newHashMap();
        parserMap.put("ios", new IosDetailParser(iosFile, date));
        parserMap.put("bundle", new IosDetailParser(bundleFile, date));
        parserMap.put("adr", new AdrDetailParser(adrFile, date));

        //创建http请求代理
        ProxyManager proxyManager = new ProxyManager(useProxy);
        visitorMap = Maps.newHashMap();
        visitorMap.put("ios", new IosDetailVisitor(proxyManager));
        visitorMap.put("adr", new AdrDetailVisitor(proxyManager));
        visitorMap.put("bundle", new BundleVisitor(proxyManager));
        //启动解析器，负责解析抓取的结果
        threads = Lists.newArrayList();
        for (DetailParser parser : parserMap.values()) {
            Thread thread = new Thread(parser);
            thread.start();
            threads.add(thread);
        }
    }

    public void run() throws InterruptedException, IOException {
        init();
        int handleNum = Files.readLines(new File(input), Charsets.UTF_8, new LineProcessor<Integer>() {
            int handleNum = 0;

            @Override
            public boolean processLine(String line) throws IOException {
                String[] array = MRUtils.SPLITTER.split(line);
                if (array != null && array.length == 2) {
                    String packageName = fixPackageName(array[0]);
                    String originPlatform = array[1];
                    if (packageName.isEmpty()) {
                        return true;
                    }
                    String platform = fixPlatform(packageName, originPlatform);
                    if (platform == null) {
                        logger.info("[can not ensure platform for package_name: " + packageName + "]");
                        return true;
                    }
                    if (!platform.equals(array[1])) {
                        logger.info("[package name: " + packageName + ", change platform " + array[1] + " to " + platform + "]");
                    }
                    try {
                        post(packageName, platform);
                        handleNum++;
                        if (handleNum % 1000 == 0) {
                            logger.info("handed " + handleNum);
                        }
                    } catch (InterruptedException e) {
                        return false;
                    }
                }
                return true;
            }

            @Override
            public Integer getResult() {
                return handleNum;
            }
        });
        System.out.println("handle package name count: " + handleNum);

        while (((ThreadPoolExecutor) pool).getActiveCount() != 0) {
            pool.awaitTermination(1, TimeUnit.SECONDS);
        }
        pool.shutdownNow();
        for (DetailParser parser : parserMap.values()) {
            parser.stop();
        }
        for (Thread thread : threads) {
            thread.join();
        }
    }

    private String fixPackageName(String packageName) {
        return fixPkgPtn.matcher(packageName).find() ? packageName.substring(2) : packageName;
    }

    private String fixPlatform(String packageName, String platform) {
        if (adrPkgPtn.matcher(packageName).matches()) {
            if ("adr".equals(platform.toLowerCase()) || "android".equals(platform.toLowerCase())) {
                return "adr";
            } else if ("ios".equals(platform.toLowerCase())) {
                return "bundle";
            }
        } else if (iosPkgPtn.matcher(packageName).matches()) {
            return "ios";
        }
        return null;
    }

    private void post(String packageName, String platform) throws InterruptedException {
        Task task = taskQueue.take();
        DetailVisitor visitor = visitorMap.get(platform);
        DetailParser parser = parserMap.get(platform);
        task.init(packageName, visitor, parser);
        pool.submit(task);
    }

    /**
     * 信息抓取接口
     */
    public interface DetailVisitor {

        HttpUtil.HttpResult visit(String packageName) throws IOException;
    }

    /**
     * 抓取结果解析接口
     */
    public interface DetailParser extends Runnable {

        String parse(String packageName, String html) throws IOException;

        void post(String packageName, HttpUtil.HttpResult httpResult) throws InterruptedException;

        void stop();
    }

    public static void main(String[] args) throws IOException, InterruptedException, ParseException {

        Options options = new Options();
        options.addOption("f", true, "package name input file");
        options.addOption("i", true, "ios output file");
        options.addOption("b", true, "bundle output file");
        options.addOption("a", true, "adr output file");
        options.addOption("p", false, "whether use proxy");
        options.addOption("d", true, "date");
        Parser parser = new PosixParser();
        CommandLine commandLine = parser.parse(options, args);
        String input = commandLine.getOptionValue("f");
        String ios = commandLine.getOptionValue("i");
        String bundle = commandLine.getOptionValue("b");
        String adr = commandLine.getOptionValue("a");
        String date = commandLine.getOptionValue("d");
        boolean useProxy = commandLine.hasOption("p");
        new AppInfoCrawler(input, ios, adr, bundle, date, useProxy).run();
    }
}
