package com.xxl.crawler.thread;

import com.xxl.crawler.XxlCrawler;
import com.xxl.crawler.annotation.PageFieldSelect;
import com.xxl.crawler.annotation.PageSelect;
import com.xxl.crawler.constant.Const;
import com.xxl.crawler.exception.XxlCrawlerException;
import com.xxl.crawler.pageloader.param.Request;
import com.xxl.crawler.pageloader.param.Response;
import com.xxl.crawler.util.FieldReflectionUtil;
import com.xxl.crawler.util.JsoupUtil;
import com.xxl.crawler.util.UrlUtil;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.net.Proxy;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/xxl/crawler/thread/CrawlerThread.class */
public class CrawlerThread implements Runnable {
    private static Logger logger = LoggerFactory.getLogger(CrawlerThread.class);
    private final XxlCrawler crawler;
    private volatile boolean running = true;
    private volatile boolean toStop = false;

    public CrawlerThread(XxlCrawler xxlCrawler) {
        this.crawler = xxlCrawler;
    }

    public void toStop() {
        this.toStop = true;
    }

    public boolean isRunning() {
        return this.running;
    }

    @Override // java.lang.Runnable
    public void run() {
        while (!this.toStop) {
            try {
                this.running = false;
                this.crawler.tryFinish();
                String url = this.crawler.getRunUrlPool().getUrl();
                this.running = true;
                if (UrlUtil.isUrl(url)) {
                    logger.info(">>>>>>>>>>> xxl crawler, process link : {}", url);
                    int failRetryCount = this.crawler.getRunConf().getFailRetryCount() + 1;
                    for (int i = 0; i < failRetryCount; i++) {
                        Response response = null;
                        try {
                            Request buildRequest = buildRequest(url);
                            this.crawler.getRunConf().getPageParser().preParse(buildRequest);
                            response = loadAndParsePage(buildRequest);
                            if (response.isSuccess()) {
                                this.crawler.getRunConf().getPageParser().afterParse(response);
                            } else {
                                this.crawler.getRunConf().getPageParser().afterParseFail(response);
                            }
                        } catch (Throwable th) {
                            logger.error(">>>>>>>>>>> xxl crawler proocess error.", th);
                        }
                        if (this.crawler.getRunConf().getPauseMillis() > 0) {
                            try {
                                TimeUnit.MILLISECONDS.sleep(this.crawler.getRunConf().getPauseMillis());
                            } catch (InterruptedException e) {
                                logger.error(">>>>>>>>>>> xxl crawler thread is interrupted. {}", e.getMessage());
                            }
                        }
                        if (response != null && response.isSuccess()) {
                            break;
                        }
                    }
                }
            } catch (Throwable th2) {
                if (th2 instanceof XxlCrawlerException) {
                    logger.error(">>>>>>>>>>> xxl crawler thread {}", th2.getMessage(), th2);
                } else {
                    logger.error(th2.getMessage(), th2);
                }
            }
        }
    }

    private Request buildRequest(String str) {
        String str2 = this.crawler.getRunConf().getUserAgentList().size() > 1 ? this.crawler.getRunConf().getUserAgentList().get(new Random().nextInt(this.crawler.getRunConf().getUserAgentList().size())) : this.crawler.getRunConf().getUserAgentList().size() == 1 ? this.crawler.getRunConf().getUserAgentList().get(0) : null;
        Proxy proxy = null;
        if (this.crawler.getRunConf().getProxyPool() != null) {
            proxy = this.crawler.getRunConf().getProxyPool().getProxy();
        }
        Request request = new Request();
        request.setUrl(str);
        request.setParamMap(this.crawler.getRunConf().getParamMap());
        request.setHeaderMap(this.crawler.getRunConf().getHeaderMap());
        request.setCookieMap(this.crawler.getRunConf().getCookieMap());
        request.setUserAgent(str2);
        request.setReferrer(this.crawler.getRunConf().getReferrer());
        request.setIfPost(this.crawler.getRunConf().isIfPost());
        request.setTimeoutMillis(this.crawler.getRunConf().getTimeoutMillis());
        request.setValidateTLSCertificates(this.crawler.getRunConf().isValidateTLSCertificates());
        request.setProxy(proxy);
        return request;
    }

    private Response loadAndParsePage(Request request) throws IllegalAccessException, InstantiationException {
        Elements select;
        Set<String> findLinks;
        Document document = null;
        try {
            document = this.crawler.getRunConf().getPageLoader().load(request);
        } catch (Exception e) {
            logger.error(e.getMessage(), e);
        }
        if (document == null) {
            return new Response(request, false, null, null, new ArrayList());
        }
        if (this.crawler.getRunConf().isAllowSpread() && (findLinks = JsoupUtil.findLinks(document)) != null && !findLinks.isEmpty()) {
            Iterator<String> it = findLinks.iterator();
            while (it.hasNext()) {
                this.crawler.getRunUrlPool().addUrl(it.next(), true);
            }
        }
        if (!this.crawler.getRunUrlPool().validUrlRegex(request.getUrl())) {
            return new Response(request, true, document, null, new ArrayList());
        }
        Type genericSuperclass = this.crawler.getRunConf().getPageParser().getClass().getGenericSuperclass();
        Class cls = genericSuperclass instanceof ParameterizedType ? (Class) ((ParameterizedType) genericSuperclass).getActualTypeArguments()[0] : Object.class;
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        PageSelect pageSelect = (PageSelect) cls.getAnnotation(PageSelect.class);
        Elements select2 = document.select((pageSelect == null || pageSelect.cssQuery() == null || pageSelect.cssQuery().trim().length() <= 0) ? "html" : pageSelect.cssQuery());
        if (select2 != null && !select2.isEmpty()) {
            Iterator it2 = select2.iterator();
            while (it2.hasNext()) {
                Element element = (Element) it2.next();
                Object newInstance = cls.newInstance();
                Field[] declaredFields = cls.getDeclaredFields();
                if (declaredFields != null && declaredFields.length > 0) {
                    for (Field field : declaredFields) {
                        if (!Modifier.isStatic(field.getModifiers())) {
                            PageFieldSelect pageFieldSelect = (PageFieldSelect) field.getAnnotation(PageFieldSelect.class);
                            String str = null;
                            Const.SelectType selectType = null;
                            String str2 = null;
                            if (pageFieldSelect != null) {
                                str = pageFieldSelect.cssQuery();
                                selectType = pageFieldSelect.selectType();
                                str2 = pageFieldSelect.selectVal();
                            }
                            if (str != null && !str.trim().isEmpty()) {
                                Object obj = null;
                                if (!(field.getGenericType() instanceof ParameterizedType)) {
                                    Elements select3 = element.select(str);
                                    String str3 = null;
                                    if (select3 != null && !select3.isEmpty()) {
                                        str3 = JsoupUtil.parseElement((Element) select3.get(0), selectType, str2);
                                    }
                                    if (str3 != null && str3.length() != 0) {
                                        try {
                                            obj = FieldReflectionUtil.parseValue(field, str3);
                                        } catch (Exception e2) {
                                            logger.error(e2.getMessage(), e2);
                                        }
                                    }
                                } else if (((ParameterizedType) field.getGenericType()).getRawType().equals(List.class) && (select = element.select(str)) != null && !select.isEmpty()) {
                                    ArrayList arrayList3 = new ArrayList();
                                    Iterator it3 = select.iterator();
                                    while (it3.hasNext()) {
                                        String parseElement = JsoupUtil.parseElement((Element) it3.next(), selectType, str2);
                                        if (parseElement != null && parseElement.length() != 0) {
                                            try {
                                                arrayList3.add(FieldReflectionUtil.parseValue(field, parseElement));
                                            } catch (Exception e3) {
                                                logger.error(e3.getMessage(), e3);
                                            }
                                        }
                                    }
                                    if (!arrayList3.isEmpty()) {
                                        obj = arrayList3;
                                    }
                                }
                                if (obj != null) {
                                    field.setAccessible(true);
                                    field.set(newInstance, obj);
                                }
                            }
                        }
                    }
                }
                arrayList.add(element);
                arrayList2.add(newInstance);
            }
        }
        return new Response(request, true, document, arrayList, arrayList2);
    }
}
