package com.xxl.crawler;

import com.xxl.crawler.pageloader.PageLoader;
import com.xxl.crawler.pageparser.PageParser;
import com.xxl.crawler.proxy.ProxyPool;
import com.xxl.crawler.runconf.RunConf;
import com.xxl.crawler.rundata.RunUrlPool;
import com.xxl.crawler.rundata.strategy.LocalRunUrlPool;
import com.xxl.crawler.thread.CrawlerThread;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/xxl/crawler/XxlCrawler.class */
public class XxlCrawler {
    private static Logger logger = LoggerFactory.getLogger(XxlCrawler.class);
    private volatile RunUrlPool runUrlPool = new LocalRunUrlPool();
    private volatile RunConf runConf = new RunConf();
    private volatile int threadCount = 1;
    private volatile ExecutorService crawlers = Executors.newCachedThreadPool();
    private volatile List<CrawlerThread> crawlerThreads = new CopyOnWriteArrayList();

    /* loaded from: input_file:com/xxl/crawler/XxlCrawler$Builder.class */
    public static class Builder {
        private XxlCrawler crawler = new XxlCrawler();

        public Builder setRunUrlPool(RunUrlPool runUrlPool) {
            this.crawler.runUrlPool = runUrlPool;
            return this;
        }

        public Builder setUrls(String... strArr) {
            if (strArr != null && strArr.length > 0) {
                for (String str : strArr) {
                    this.crawler.runUrlPool.addUrl(str, false);
                }
            }
            return this;
        }

        public Builder setWhiteUrlRegexs(String... strArr) {
            if (strArr != null && strArr.length > 0) {
                for (String str : strArr) {
                    this.crawler.runUrlPool.addWhiteUrlRegex(str);
                }
            }
            return this;
        }

        public Builder setAllowSpread(boolean z) {
            this.crawler.runConf.setAllowSpread(z);
            return this;
        }

        public Builder setPageLoader(PageLoader pageLoader) {
            this.crawler.runConf.setPageLoader(pageLoader);
            return this;
        }

        public Builder setPageParser(PageParser pageParser) {
            this.crawler.runConf.setPageParser(pageParser);
            return this;
        }

        public Builder setParamMap(Map<String, String> map) {
            this.crawler.runConf.setParamMap(map);
            return this;
        }

        public Builder setHeaderMap(Map<String, String> map) {
            this.crawler.runConf.setHeaderMap(map);
            return this;
        }

        public Builder setCookieMap(Map<String, String> map) {
            this.crawler.runConf.setCookieMap(map);
            return this;
        }

        public Builder setUserAgent(String... strArr) {
            if (strArr != null && strArr.length > 0) {
                for (String str : strArr) {
                    if (!this.crawler.runConf.getUserAgentList().contains(str)) {
                        this.crawler.runConf.getUserAgentList().add(str);
                    }
                }
            }
            return this;
        }

        public Builder setReferrer(String str) {
            this.crawler.runConf.setReferrer(str);
            return this;
        }

        public Builder setIfPost(boolean z) {
            this.crawler.runConf.setIfPost(z);
            return this;
        }

        public Builder setTimeoutMillis(int i) {
            this.crawler.runConf.setTimeoutMillis(i);
            return this;
        }

        public Builder setValidateTLSCertificates(boolean z) {
            this.crawler.runConf.setValidateTLSCertificates(z);
            return this;
        }

        public Builder setProxyPool(ProxyPool proxyPool) {
            this.crawler.runConf.setProxyPool(proxyPool);
            return this;
        }

        public Builder setPauseMillis(int i) {
            this.crawler.runConf.setPauseMillis(i);
            return this;
        }

        public Builder setFailRetryCount(int i) {
            if (i > 0) {
                this.crawler.runConf.setFailRetryCount(i);
            }
            return this;
        }

        public Builder setThreadCount(int i) {
            this.crawler.threadCount = i;
            return this;
        }

        public XxlCrawler build() {
            return this.crawler;
        }
    }

    public RunUrlPool getRunUrlPool() {
        return this.runUrlPool;
    }

    public RunConf getRunConf() {
        return this.runConf;
    }

    public void start(boolean z) {
        if (this.runUrlPool == null) {
            throw new RuntimeException("xxl crawler runUrlPool can not be null.");
        }
        if (this.runUrlPool.getUrlNum() <= 0) {
            throw new RuntimeException("xxl crawler indexUrl can not be empty.");
        }
        if (this.runConf == null) {
            throw new RuntimeException("xxl crawler runConf can not be empty.");
        }
        if (this.threadCount < 1 || this.threadCount > 1000) {
            throw new RuntimeException("xxl crawler threadCount invalid, threadCount : " + this.threadCount);
        }
        if (this.runConf.getPageLoader() == null) {
            throw new RuntimeException("xxl crawler pageLoader can not be null.");
        }
        if (this.runConf.getPageParser() == null) {
            throw new RuntimeException("xxl crawler pageParser can not be null.");
        }
        if (this.runConf.getTimeoutMillis() <= 0 || this.runConf.getTimeoutMillis() > 300000) {
            throw new RuntimeException("xxl crawler timeoutMillis invalid.");
        }
        if (this.runConf.getPauseMillis() < 0 || this.runConf.getPauseMillis() > 600000) {
            throw new RuntimeException("xxl crawler pauseMillis invalid.");
        }
        if (this.runConf.getFailRetryCount() < 0 || this.runConf.getFailRetryCount() > 100) {
            throw new RuntimeException("xxl crawler failRetryCount invalid.");
        }
        logger.info(">>>>>>>>>>> xxl crawler start ...");
        for (int i = 0; i < this.threadCount; i++) {
            this.crawlerThreads.add(new CrawlerThread(this));
        }
        Iterator<CrawlerThread> it = this.crawlerThreads.iterator();
        while (it.hasNext()) {
            this.crawlers.execute(it.next());
        }
        this.crawlers.shutdown();
        if (z) {
            while (!this.crawlers.awaitTermination(5L, TimeUnit.SECONDS)) {
                try {
                    logger.info(">>>>>>>>>>> xxl crawler still running ...");
                } catch (InterruptedException e) {
                    logger.error(e.getMessage(), e);
                    return;
                }
            }
        }
    }

    public void tryFinish() {
        boolean z = false;
        Iterator<CrawlerThread> it = this.crawlerThreads.iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            } else if (it.next().isRunning()) {
                z = true;
                break;
            }
        }
        if (this.runUrlPool.getUrlNum() == 0 && !z) {
            stop();
        }
    }

    public void stop() {
        Iterator<CrawlerThread> it = this.crawlerThreads.iterator();
        while (it.hasNext()) {
            it.next().toStop();
        }
        this.crawlers.shutdownNow();
        logger.info(">>>>>>>>>>> xxl crawler stop.");
    }
}
