package net.dankito.readability4j.processor;

import com.gargoylesoftware.htmlunit.html.HtmlArticle;
import com.gargoylesoftware.htmlunit.html.HtmlBlockQuote;
import com.gargoylesoftware.htmlunit.html.HtmlBody;
import com.gargoylesoftware.htmlunit.html.HtmlButton;
import com.gargoylesoftware.htmlunit.html.HtmlCaption;
import com.gargoylesoftware.htmlunit.html.HtmlDefinitionList;
import com.gargoylesoftware.htmlunit.html.HtmlDirectory;
import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlEmbed;
import com.gargoylesoftware.htmlunit.html.HtmlFieldSet;
import com.gargoylesoftware.htmlunit.html.HtmlFooter;
import com.gargoylesoftware.htmlunit.html.HtmlForm;
import com.gargoylesoftware.htmlunit.html.HtmlFrame;
import com.gargoylesoftware.htmlunit.html.HtmlHeader;
import com.gargoylesoftware.htmlunit.html.HtmlHeading1;
import com.gargoylesoftware.htmlunit.html.HtmlHeading2;
import com.gargoylesoftware.htmlunit.html.HtmlHeading3;
import com.gargoylesoftware.htmlunit.html.HtmlHeading4;
import com.gargoylesoftware.htmlunit.html.HtmlHeading5;
import com.gargoylesoftware.htmlunit.html.HtmlHeading6;
import com.gargoylesoftware.htmlunit.html.HtmlHorizontalRule;
import com.gargoylesoftware.htmlunit.html.HtmlImage;
import com.gargoylesoftware.htmlunit.html.HtmlInlineFrame;
import com.gargoylesoftware.htmlunit.html.HtmlLink;
import com.gargoylesoftware.htmlunit.html.HtmlObject;
import com.gargoylesoftware.htmlunit.html.HtmlOrderedList;
import com.gargoylesoftware.htmlunit.html.HtmlParagraph;
import com.gargoylesoftware.htmlunit.html.HtmlPreformattedText;
import com.gargoylesoftware.htmlunit.html.HtmlSection;
import com.gargoylesoftware.htmlunit.html.HtmlSummary;
import com.gargoylesoftware.htmlunit.html.HtmlSvg;
import com.gargoylesoftware.htmlunit.html.HtmlTable;
import com.gargoylesoftware.htmlunit.html.HtmlTableColumn;
import com.gargoylesoftware.htmlunit.html.HtmlTableDataCell;
import com.gargoylesoftware.htmlunit.html.HtmlTableFooter;
import com.gargoylesoftware.htmlunit.html.HtmlTableHeader;
import com.gargoylesoftware.htmlunit.html.HtmlTableHeaderCell;
import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
import com.gargoylesoftware.htmlunit.html.HtmlTextArea;
import com.gargoylesoftware.htmlunit.html.HtmlUnorderedList;
import com.intellij.uiDesigner.UIFormXmlConstants;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TypeCastException;
import kotlin.collections.CollectionsKt;
import kotlin.comparisons.ComparisonsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.text.Regex;
import kotlin.text.StringsKt;
import net.dankito.readability4j.model.ArticleGrabberOptions;
import net.dankito.readability4j.model.ArticleMetadata;
import net.dankito.readability4j.model.ReadabilityObject;
import net.dankito.readability4j.model.ReadabilityOptions;
import net.dankito.readability4j.util.RegExUtil;
import nl.siegmann.epublib.epub.PackageDocumentBase;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.apache.xalan.templates.Constants;
import org.apache.xmpbox.type.ThumbnailType;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import org.openqa.selenium.remote.DriverCommand;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* compiled from: ArticleGrabber.kt */
@Metadata(mv = {1, 1, 8}, bv = {1, 0, 2}, k = 1, d1 = {"��\u0094\u0001\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0010\u000e\n\u0002\b\t\n\u0002\u0010\b\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0010\u000b\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\t\n\u0002\u0010\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0010\f\n\u0002\b\u0002\n\u0002\u0010\u0006\n\u0002\b\u0004\n\u0002\u0010 \n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0006\n\u0002\u0018\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\b\u0015\b\u0016\u0018�� g2\u00020\u0001:\u0001gB\u0017\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\b\b\u0002\u0010\u0004\u001a\u00020\u0005¢\u0006\u0002\u0010\u0006J\u0018\u0010%\u001a\u00020\u001a2\u0006\u0010&\u001a\u00020\u00192\u0006\u0010'\u001a\u00020\bH\u0014J\u0018\u0010(\u001a\u00020)2\u0006\u0010*\u001a\u00020\u00192\u0006\u0010+\u001a\u00020\bH\u0014J \u0010,\u001a\u00020)2\u0006\u0010*\u001a\u00020\u00192\u0006\u0010+\u001a\u00020\b2\u0006\u0010\u0002\u001a\u00020-H\u0014J\u0018\u0010.\u001a\u00020)2\u0006\u0010*\u001a\u00020\u00192\u0006\u0010\u0002\u001a\u00020-H\u0014J\u0018\u0010/\u001a\u00020)2\u0006\u0010*\u001a\u00020\u00192\u0006\u00100\u001a\u000201H\u0014J\u0010\u00102\u001a\u00020)2\u0006\u0010*\u001a\u00020\u0019H\u0014J \u00103\u001a\u00020\u00192\u0006\u00104\u001a\u0002052\u0006\u00106\u001a\u00020\u00192\u0006\u00107\u001a\u00020\u001aH\u0014J\u001a\u00108\u001a\u00020\u00122\u0006\u0010&\u001a\u00020\u00192\b\b\u0002\u00109\u001a\u00020:H\u0014J\u0018\u0010;\u001a\u00020\u00122\u0006\u0010*\u001a\u00020\u00192\u0006\u0010\u0002\u001a\u00020-H\u0014J\u0010\u0010<\u001a\u00020=2\u0006\u0010>\u001a\u00020\u0019H\u0014J\u001c\u0010?\u001a\u0004\u0018\u00010\u00192\u0006\u0010&\u001a\u00020\u00192\b\b\u0002\u0010@\u001a\u00020\u001aH\u0014J \u0010A\u001a\b\u0012\u0004\u0012\u00020\u00190B2\u0006\u0010&\u001a\u00020\u00192\b\b\u0002\u0010C\u001a\u00020\u0012H\u0014J\u0010\u0010\u001c\u001a\u00020\u001a2\u0006\u0010D\u001a\u00020\u0019H\u0014J\u0012\u0010E\u001a\u0004\u0018\u00010\u001f2\u0006\u0010>\u001a\u00020\u0019H\u0014J\u001c\u0010F\u001a\u000e\u0012\u0004\u0012\u00020\u0012\u0012\u0004\u0012\u00020\u00120G2\u0006\u0010D\u001a\u00020\u0019H\u0014J\u0018\u0010H\u001a\u00020)2\u0006\u00106\u001a\u00020\u00192\u0006\u00104\u001a\u000205H\u0014J2\u0010I\u001a\u000e\u0012\u0004\u0012\u00020\u0019\u0012\u0004\u0012\u00020\u001a0G2\u0006\u0010J\u001a\u00020\u00192\f\u0010K\u001a\b\u0012\u0004\u0012\u00020\u00190B2\u0006\u0010\u0002\u001a\u00020-H\u0014J0\u0010L\u001a\u0004\u0018\u00010\u00192\u0006\u00104\u001a\u0002052\u0006\u0010M\u001a\u00020N2\b\b\u0002\u0010\u0002\u001a\u00020-2\n\b\u0002\u0010O\u001a\u0004\u0018\u00010\u0019H\u0016J:\u0010P\u001a\u00020\u001a2\u0006\u0010&\u001a\u00020\u00192\u0006\u0010Q\u001a\u00020\b2\b\b\u0002\u0010C\u001a\u00020\u00122\u0016\b\u0002\u0010R\u001a\u0010\u0012\u0004\u0012\u00020\u0019\u0012\u0004\u0012\u00020\u001a\u0018\u00010SH\u0014J\u0010\u0010T\u001a\u00020\u001a2\u0006\u0010>\u001a\u00020\u0019H\u0014J\u0010\u0010U\u001a\u00020\u001a2\u0006\u0010>\u001a\u00020\u0019H\u0014J\u0018\u0010V\u001a\u00020\u001f2\u0006\u0010&\u001a\u00020\u00192\u0006\u0010\u0002\u001a\u00020-H\u0014J\u0010\u0010W\u001a\u00020\u001a2\u0006\u0010&\u001a\u00020\u0019H\u0014J\u0010\u0010X\u001a\u00020\u001a2\u0006\u0010Y\u001a\u00020\bH\u0014J\u0010\u0010Z\u001a\u00020)2\u0006\u0010[\u001a\u00020\u0019H\u0014J \u0010\\\u001a\u00020)2\u0006\u0010]\u001a\u00020\u00192\u0006\u0010\u0002\u001a\u00020-2\u0006\u0010M\u001a\u00020NH\u0014J\u001e\u0010^\u001a\b\u0012\u0004\u0012\u00020\u00190B2\u0006\u00104\u001a\u0002052\u0006\u0010\u0002\u001a\u00020-H\u0014J\u001c\u0010_\u001a\u0004\u0018\u00010\u00192\u0006\u0010&\u001a\u00020\u00192\b\b\u0002\u0010`\u001a\u00020\bH\u0014J$\u0010a\u001a\b\u0012\u0004\u0012\u00020\u00190B2\f\u0010b\u001a\b\u0012\u0004\u0012\u00020\u00190B2\u0006\u0010\u0002\u001a\u00020-H\u0014J\u0018\u0010c\u001a\u00020)2\u0006\u0010&\u001a\u00020\u00192\u0006\u0010Q\u001a\u00020\bH\u0014J\u0018\u0010d\u001a\u00020)2\u0006\u0010D\u001a\u00020\u00192\u0006\u0010\u0017\u001a\u00020\u001aH\u0014J\u0010\u0010e\u001a\u00020\u001a2\u0006\u0010f\u001a\u00020\u0019H\u0014R(\u0010\t\u001a\u0004\u0018\u00010\b2\b\u0010\u0007\u001a\u0004\u0018\u00010\b@DX\u0086\u000e¢\u0006\u000e\n��\u001a\u0004\b\n\u0010\u000b\"\u0004\b\f\u0010\rR(\u0010\u000e\u001a\u0004\u0018\u00010\b2\b\u0010\u0007\u001a\u0004\u0018\u00010\b@DX\u0086\u000e¢\u0006\u000e\n��\u001a\u0004\b\u000f\u0010\u000b\"\u0004\b\u0010\u0010\rR\u0014\u0010\u0011\u001a\u00020\u0012X\u0084\u0004¢\u0006\b\n��\u001a\u0004\b\u0013\u0010\u0014R\u0014\u0010\u0002\u001a\u00020\u0003X\u0084\u0004¢\u0006\b\n��\u001a\u0004\b\u0015\u0010\u0016R0\u0010\u0017\u001a\u001e\u0012\u0004\u0012\u00020\u0019\u0012\u0004\u0012\u00020\u001a0\u0018j\u000e\u0012\u0004\u0012\u00020\u0019\u0012\u0004\u0012\u00020\u001a`\u001bX\u0084\u0004¢\u0006\b\n��\u001a\u0004\b\u001c\u0010\u001dR0\u0010\u001e\u001a\u001e\u0012\u0004\u0012\u00020\u0019\u0012\u0004\u0012\u00020\u001f0\u0018j\u000e\u0012\u0004\u0012\u00020\u0019\u0012\u0004\u0012\u00020\u001f`\u001bX\u0084\u0004¢\u0006\b\n��\u001a\u0004\b \u0010\u001dR\u0014\u0010\u0004\u001a\u00020\u0005X\u0084\u0004¢\u0006\b\n��\u001a\u0004\b!\u0010\"R\u0014\u0010#\u001a\u00020\u0012X\u0084\u0004¢\u0006\b\n��\u001a\u0004\b$\u0010\u0014¨\u0006h"}, d2 = {"Lnet/dankito/readability4j/processor/ArticleGrabber;", "Lnet/dankito/readability4j/processor/ProcessorBase;", "options", "Lnet/dankito/readability4j/model/ReadabilityOptions;", "regEx", "Lnet/dankito/readability4j/util/RegExUtil;", "(Lnet/dankito/readability4j/model/ReadabilityOptions;Lnet/dankito/readability4j/util/RegExUtil;)V", "<set-?>", "", "articleByline", "getArticleByline", "()Ljava/lang/String;", "setArticleByline", "(Ljava/lang/String;)V", "articleDir", "getArticleDir", "setArticleDir", "nbTopCandidates", "", "getNbTopCandidates", "()I", "getOptions", "()Lnet/dankito/readability4j/model/ReadabilityOptions;", "readabilityDataTable", "Ljava/util/HashMap;", "Lorg/jsoup/nodes/Element;", "", "Lkotlin/collections/HashMap;", "getReadabilityDataTable", "()Ljava/util/HashMap;", "readabilityObjects", "Lnet/dankito/readability4j/model/ReadabilityObject;", "getReadabilityObjects", "getRegEx", "()Lnet/dankito/readability4j/util/RegExUtil;", "wordThreshold", "getWordThreshold", "checkByline", "node", "matchString", "clean", "", "e", "tag", "cleanConditionally", "Lnet/dankito/readability4j/model/ArticleGrabberOptions;", "cleanHeaders", "cleanMatchedNodes", "regex", "Lkotlin/text/Regex;", "cleanStyles", "createArticleContent", "doc", "Lorg/jsoup/nodes/Document;", "topCandidate", "isPaging", "getCharCount", OperatorName.CURVE_TO, "", "getClassWeight", "getLinkDensity", "", "element", "getNextNode", "ignoreSelfAndKids", "getNodeAncestors", "", "maxDepth", HtmlTable.TAG_NAME, "getReadabilityObject", "getRowAndColumnCount", "Lkotlin/Pair;", "getTextDirection", "getTopCandidate", "page", "candidates", "grabArticle", "metadata", "Lnet/dankito/readability4j/model/ArticleMetadata;", "pageElement", "hasAncestorTag", "tagName", "filterFn", "Lkotlin/Function1;", "hasChildBlockElement", "hasSinglePInsideElement", "initializeNode", "isElementWithoutContent", "isValidByline", "text", "markDataTables", Constants.ELEMNAME_ROOT_STRING, "prepArticle", "articleContent", "prepareNodes", "removeAndGetNext", "reason", "scoreElements", "elementsToScore", "setNodeTag", "setReadabilityDataTable", "shouldKeepSibling", "sibling", "Companion", "Readability4J"})
/* loaded from: input_file:net/dankito/readability4j/processor/ArticleGrabber.class */
public class ArticleGrabber extends ProcessorBase {

    @Nullable
    private String articleByline;

    @Nullable
    private String articleDir;
    private final int nbTopCandidates;
    private final int wordThreshold;

    @NotNull
    private final HashMap<Element, ReadabilityObject> readabilityObjects;

    @NotNull
    private final HashMap<Element, Boolean> readabilityDataTable;

    @NotNull
    private final ReadabilityOptions options;

    @NotNull
    private final RegExUtil regEx;
    public static final Companion Companion = new Companion(null);
    private static final List<String> DEFAULT_TAGS_TO_SCORE = Arrays.asList(HtmlSection.TAG_NAME, HtmlHeading2.TAG_NAME, HtmlHeading3.TAG_NAME, HtmlHeading4.TAG_NAME, HtmlHeading5.TAG_NAME, HtmlHeading6.TAG_NAME, HtmlParagraph.TAG_NAME, HtmlTableDataCell.TAG_NAME, HtmlPreformattedText.TAG_NAME);
    private static final List<String> DIV_TO_P_ELEMS = Arrays.asList("a", HtmlBlockQuote.TAG_NAME, HtmlDefinitionList.TAG_NAME, HtmlDivision.TAG_NAME, HtmlImage.TAG_NAME, HtmlOrderedList.TAG_NAME, HtmlParagraph.TAG_NAME, HtmlPreformattedText.TAG_NAME, HtmlTable.TAG_NAME, HtmlUnorderedList.TAG_NAME, "select");
    private static final List<String> ALTER_TO_DIV_EXCEPTIONS = Arrays.asList(HtmlDivision.TAG_NAME, HtmlArticle.TAG_NAME, HtmlSection.TAG_NAME, HtmlParagraph.TAG_NAME);
    private static final List<String> PRESENTATIONAL_ATTRIBUTES = Arrays.asList("align", "background", "bgcolor", UIFormXmlConstants.ELEMENT_BORDER, "cellpadding", "cellspacing", HtmlFrame.TAG_NAME, "hspace", "rules", "style", "valign", "vspace");
    private static final List<String> DEPRECATED_SIZE_ATTRIBUTE_ELEMS = Arrays.asList(HtmlTable.TAG_NAME, HtmlTableHeaderCell.TAG_NAME, HtmlTableDataCell.TAG_NAME, HtmlHorizontalRule.TAG_NAME, HtmlPreformattedText.TAG_NAME);
    private static final List<String> EMBEDDED_NODES = Arrays.asList(HtmlObject.TAG_NAME, HtmlEmbed.TAG_NAME, HtmlInlineFrame.TAG_NAME);
    private static final List<String> DATA_TABLE_DESCENDANTS = Arrays.asList(HtmlTableColumn.TAG_NAME, "colgroup", HtmlTableFooter.TAG_NAME, HtmlTableHeader.TAG_NAME, HtmlTableHeaderCell.TAG_NAME);
    private static final Logger log = LoggerFactory.getLogger((Class<?>) ArticleGrabber.class);

    /* compiled from: ArticleGrabber.kt */
    @Metadata(mv = {1, 1, 8}, bv = {1, 0, 2}, k = 1, d1 = {"��&\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0010!\n\u0002\u0010\u000e\n��\n\u0002\u0010 \n\u0002\b\u000f\n\u0002\u0018\u0002\n\u0002\b\u0003\b\u0086\u0003\u0018��2\u00020\u0001B\u0007\b\u0002¢\u0006\u0002\u0010\u0002R5\u0010\u0003\u001a&\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005 \u0006*\u0012\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005\u0018\u00010\u00070\u0004¢\u0006\b\n��\u001a\u0004\b\b\u0010\tR5\u0010\n\u001a&\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005 \u0006*\u0012\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005\u0018\u00010\u00070\u0004¢\u0006\b\n��\u001a\u0004\b\u000b\u0010\tR5\u0010\f\u001a&\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005 \u0006*\u0012\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005\u0018\u00010\u00070\u0004¢\u0006\b\n��\u001a\u0004\b\r\u0010\tR5\u0010\u000e\u001a&\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005 \u0006*\u0012\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005\u0018\u00010\u00070\u0004¢\u0006\b\n��\u001a\u0004\b\u000f\u0010\tR5\u0010\u0010\u001a&\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005 \u0006*\u0012\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005\u0018\u00010\u00070\u0004¢\u0006\b\n��\u001a\u0004\b\u0011\u0010\tR5\u0010\u0012\u001a&\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005 \u0006*\u0012\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005\u0018\u00010\u00070\u0004¢\u0006\b\n��\u001a\u0004\b\u0013\u0010\tR5\u0010\u0014\u001a&\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005 \u0006*\u0012\u0012\f\u0012\n \u0006*\u0004\u0018\u00010\u00050\u0005\u0018\u00010\u00070\u0004¢\u0006\b\n��\u001a\u0004\b\u0015\u0010\tR\u001c\u0010\u0016\u001a\n \u0006*\u0004\u0018\u00010\u00170\u0017X\u0082\u0004¢\u0006\b\n��\u001a\u0004\b\u0018\u0010\u0019¨\u0006\u001a"}, d2 = {"Lnet/dankito/readability4j/processor/ArticleGrabber$Companion;", "", "()V", "ALTER_TO_DIV_EXCEPTIONS", "", "", "kotlin.jvm.PlatformType", "", "getALTER_TO_DIV_EXCEPTIONS", "()Ljava/util/List;", "DATA_TABLE_DESCENDANTS", "getDATA_TABLE_DESCENDANTS", "DEFAULT_TAGS_TO_SCORE", "getDEFAULT_TAGS_TO_SCORE", "DEPRECATED_SIZE_ATTRIBUTE_ELEMS", "getDEPRECATED_SIZE_ATTRIBUTE_ELEMS", "DIV_TO_P_ELEMS", "getDIV_TO_P_ELEMS", "EMBEDDED_NODES", "getEMBEDDED_NODES", "PRESENTATIONAL_ATTRIBUTES", "getPRESENTATIONAL_ATTRIBUTES", "log", "Lorg/slf4j/Logger;", DriverCommand.GET_LOG, "()Lorg/slf4j/Logger;", "Readability4J"})
    /* loaded from: input_file:net/dankito/readability4j/processor/ArticleGrabber$Companion.class */
    public static final class Companion {
        public final List<String> getDEFAULT_TAGS_TO_SCORE() {
            return ArticleGrabber.DEFAULT_TAGS_TO_SCORE;
        }

        public final List<String> getDIV_TO_P_ELEMS() {
            return ArticleGrabber.DIV_TO_P_ELEMS;
        }

        public final List<String> getALTER_TO_DIV_EXCEPTIONS() {
            return ArticleGrabber.ALTER_TO_DIV_EXCEPTIONS;
        }

        public final List<String> getPRESENTATIONAL_ATTRIBUTES() {
            return ArticleGrabber.PRESENTATIONAL_ATTRIBUTES;
        }

        public final List<String> getDEPRECATED_SIZE_ATTRIBUTE_ELEMS() {
            return ArticleGrabber.DEPRECATED_SIZE_ATTRIBUTE_ELEMS;
        }

        public final List<String> getEMBEDDED_NODES() {
            return ArticleGrabber.EMBEDDED_NODES;
        }

        public final List<String> getDATA_TABLE_DESCENDANTS() {
            return ArticleGrabber.DATA_TABLE_DESCENDANTS;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public final Logger getLog() {
            return ArticleGrabber.log;
        }

        private Companion() {
        }

        public /* synthetic */ Companion(DefaultConstructorMarker defaultConstructorMarker) {
            this();
        }
    }

    @Nullable
    public final String getArticleByline() {
        return this.articleByline;
    }

    protected final void setArticleByline(@Nullable String str) {
        this.articleByline = str;
    }

    @Nullable
    public final String getArticleDir() {
        return this.articleDir;
    }

    protected final void setArticleDir(@Nullable String str) {
        this.articleDir = str;
    }

    protected final int getNbTopCandidates() {
        return this.nbTopCandidates;
    }

    protected final int getWordThreshold() {
        return this.wordThreshold;
    }

    @NotNull
    protected final HashMap<Element, ReadabilityObject> getReadabilityObjects() {
        return this.readabilityObjects;
    }

    @NotNull
    protected final HashMap<Element, Boolean> getReadabilityDataTable() {
        return this.readabilityDataTable;
    }

    @Nullable
    public Element grabArticle(@NotNull Document doc, @NotNull ArticleMetadata metadata, @NotNull ArticleGrabberOptions options, @Nullable Element element) {
        Element first;
        Element createArticleContent;
        boolean z;
        Intrinsics.checkParameterIsNotNull(doc, "doc");
        Intrinsics.checkParameterIsNotNull(metadata, "metadata");
        Intrinsics.checkParameterIsNotNull(options, "options");
        Companion.getLog().debug("**** grabArticle ****");
        boolean z2 = element != null;
        Element element2 = element;
        if (element2 == null) {
            element2 = doc.body();
        }
        Element element3 = element2;
        if (element3 == null) {
            Companion.getLog().debug("No body found in document. Abort.");
            return null;
        }
        String html = doc.html();
        do {
            Pair<Element, Boolean> topCandidate = getTopCandidate(element3, scoreElements(prepareNodes(doc, options), options), options);
            first = topCandidate.getFirst();
            boolean booleanValue = topCandidate.getSecond().booleanValue();
            createArticleContent = createArticleContent(doc, first, z2);
            Companion.getLog().debug("Article content pre-prep: {}", createArticleContent.html());
            prepArticle(createArticleContent, options, metadata);
            Companion.getLog().debug("Article content post-prep: {}", createArticleContent.html());
            if (booleanValue) {
                first.attr("id", "readability-page-1");
                first.addClass("page");
            } else {
                Element createElement = doc.createElement(HtmlDivision.TAG_NAME);
                createElement.attr("id", "readability-page-1");
                createElement.addClass("page");
                for (Node node : new ArrayList(createArticleContent.childNodes())) {
                    node.remove();
                    createElement.appendChild(node);
                }
                createArticleContent.appendChild(createElement);
            }
            Companion.getLog().debug("Article content after paging: {}", createArticleContent.html());
            z = true;
            ArrayList arrayList = new ArrayList();
            int length = getInnerText(createArticleContent, this.regEx, true).length();
            if (length < this.wordThreshold) {
                z = false;
                element3.html(html);
                if (options.getStripUnlikelyCandidates()) {
                    options.setStripUnlikelyCandidates(false);
                    arrayList.add(new Pair(createArticleContent, Integer.valueOf(length)));
                } else if (options.getWeightClasses()) {
                    options.setWeightClasses(false);
                    arrayList.add(new Pair(createArticleContent, Integer.valueOf(length)));
                } else if (options.getCleanConditionally()) {
                    options.setCleanConditionally(false);
                    arrayList.add(new Pair(createArticleContent, Integer.valueOf(length)));
                } else {
                    arrayList.add(new Pair(createArticleContent, Integer.valueOf(length)));
                    ArrayList arrayList2 = arrayList;
                    if (arrayList2.size() > 1) {
                        CollectionsKt.sortWith(arrayList2, new Comparator<T>() { // from class: net.dankito.readability4j.processor.ArticleGrabber$grabArticle$$inlined$sortBy$1
                            /* JADX WARN: Multi-variable type inference failed */
                            @Override // java.util.Comparator
                            public final int compare(T t, T t2) {
                                return ComparisonsKt.compareValues(Integer.valueOf(((Number) ((Pair) t).getSecond()).intValue()), Integer.valueOf(((Number) ((Pair) t2).getSecond()).intValue()));
                            }
                        });
                    }
                    if (arrayList.isEmpty() || ((Number) ((Pair) arrayList.get(0)).getSecond()).intValue() <= 0) {
                        return null;
                    }
                    createArticleContent = (Element) ((Pair) arrayList.get(0)).getFirst();
                    z = true;
                }
            }
        } while (!z);
        getTextDirection(first, doc);
        return createArticleContent;
    }

    @Nullable
    public static /* bridge */ /* synthetic */ Element grabArticle$default(ArticleGrabber articleGrabber, Document document, ArticleMetadata articleMetadata, ArticleGrabberOptions articleGrabberOptions, Element element, int i, Object obj) {
        if (obj != null) {
            throw new UnsupportedOperationException("Super calls with default arguments not supported in this target, function: grabArticle");
        }
        if ((i & 4) != 0) {
            articleGrabberOptions = new ArticleGrabberOptions(false, false, false, 7, null);
        }
        if ((i & 8) != 0) {
            element = (Element) null;
        }
        return articleGrabber.grabArticle(document, articleMetadata, articleGrabberOptions, element);
    }

    @NotNull
    protected List<Element> prepareNodes(@NotNull Document doc, @NotNull ArticleGrabberOptions options) {
        Intrinsics.checkParameterIsNotNull(doc, "doc");
        Intrinsics.checkParameterIsNotNull(options, "options");
        ArrayList arrayList = new ArrayList();
        Document document = doc;
        while (true) {
            Element element = document;
            if (element == null) {
                return arrayList;
            }
            String str = element.className() + " " + element.id();
            if (checkByline(element, str)) {
                document = removeAndGetNext(element, "byline");
            } else if (options.getStripUnlikelyCandidates() && this.regEx.isUnlikelyCandidate(str) && !this.regEx.okMaybeItsACandidate(str) && (!Intrinsics.areEqual(element.tagName(), HtmlBody.TAG_NAME)) && (!Intrinsics.areEqual(element.tagName(), "a"))) {
                document = removeAndGetNext(element, "Removing unlikely candidate");
            } else if ((Intrinsics.areEqual(element.tagName(), HtmlDivision.TAG_NAME) || Intrinsics.areEqual(element.tagName(), HtmlSection.TAG_NAME) || Intrinsics.areEqual(element.tagName(), HtmlHeader.TAG_NAME) || Intrinsics.areEqual(element.tagName(), HtmlHeading1.TAG_NAME) || Intrinsics.areEqual(element.tagName(), HtmlHeading2.TAG_NAME) || Intrinsics.areEqual(element.tagName(), HtmlHeading3.TAG_NAME) || Intrinsics.areEqual(element.tagName(), HtmlHeading4.TAG_NAME) || Intrinsics.areEqual(element.tagName(), HtmlHeading5.TAG_NAME) || Intrinsics.areEqual(element.tagName(), HtmlHeading6.TAG_NAME)) && isElementWithoutContent(element)) {
                document = removeAndGetNext(element, "node without content");
            } else {
                if (Companion.getDEFAULT_TAGS_TO_SCORE().contains(element.tagName())) {
                    arrayList.add(element);
                }
                if (Intrinsics.areEqual(element.tagName(), HtmlDivision.TAG_NAME)) {
                    if (hasSinglePInsideElement(element)) {
                        Element child = element.child(0);
                        element.replaceWith(child);
                        element = child;
                        arrayList.add(element);
                    } else if (hasChildBlockElement(element)) {
                        for (Node node : element.childNodes()) {
                            if (node instanceof TextNode) {
                                String text = ((TextNode) node).text();
                                if (text == null) {
                                    throw new TypeCastException("null cannot be cast to non-null type kotlin.CharSequence");
                                }
                                if (StringsKt.trim((CharSequence) text).toString().length() > 0) {
                                    Element createElement = doc.createElement(HtmlParagraph.TAG_NAME);
                                    createElement.text(((TextNode) node).text());
                                    createElement.attr("style", "display: inline;");
                                    createElement.addClass("readability-styled");
                                    node.replaceWith(createElement);
                                }
                            }
                        }
                    } else {
                        setNodeTag(element, HtmlParagraph.TAG_NAME);
                        arrayList.add(element);
                    }
                }
                document = element != null ? getNextNode$default(this, element, false, 2, null) : null;
            }
        }
    }

    protected boolean checkByline(@NotNull Element node, @NotNull String matchString) {
        Intrinsics.checkParameterIsNotNull(node, "node");
        Intrinsics.checkParameterIsNotNull(matchString, "matchString");
        if (this.articleByline != null) {
            return false;
        }
        if (!Intrinsics.areEqual(node.attr("rel"), "author") && !this.regEx.isByline(matchString)) {
            return false;
        }
        String wholeText = node.wholeText();
        Intrinsics.checkExpressionValueIsNotNull(wholeText, "node.wholeText()");
        if (!isValidByline(wholeText)) {
            return false;
        }
        String text = node.text();
        if (text == null) {
            throw new TypeCastException("null cannot be cast to non-null type kotlin.CharSequence");
        }
        this.articleByline = StringsKt.trim((CharSequence) text).toString();
        return true;
    }

    protected boolean isValidByline(@NotNull String text) {
        Intrinsics.checkParameterIsNotNull(text, "text");
        String obj = StringsKt.trim((CharSequence) text).toString();
        return (obj.length() > 0) && obj.length() < 100;
    }

    protected boolean isElementWithoutContent(@NotNull Element node) {
        Intrinsics.checkParameterIsNotNull(node, "node");
        return StringsKt.isBlank(node.text()) && (node.children().size() == 0 || node.children().size() == node.getElementsByTag("br").size() + node.getElementsByTag(HtmlHorizontalRule.TAG_NAME).size());
    }

    protected boolean hasSinglePInsideElement(@NotNull Element element) {
        Intrinsics.checkParameterIsNotNull(element, "element");
        if (element.children().size() != 1 || (!Intrinsics.areEqual(element.child(0).tagName(), HtmlParagraph.TAG_NAME))) {
            return false;
        }
        for (Node node : element.childNodes()) {
            if (node instanceof TextNode) {
                RegExUtil regExUtil = this.regEx;
                String text = ((TextNode) node).text();
                Intrinsics.checkExpressionValueIsNotNull(text, "node.text()");
                if (regExUtil.hasContent(text)) {
                    return false;
                }
            }
        }
        return true;
    }

    protected boolean hasChildBlockElement(@NotNull Element element) {
        Intrinsics.checkParameterIsNotNull(element, "element");
        for (Element node : element.children()) {
            if (Companion.getDIV_TO_P_ELEMS().contains(node.tagName())) {
                return true;
            }
            Intrinsics.checkExpressionValueIsNotNull(node, "node");
            if (hasChildBlockElement(node)) {
                return true;
            }
        }
        return false;
    }

    protected void setNodeTag(@NotNull Element node, @NotNull String tagName) {
        Intrinsics.checkParameterIsNotNull(node, "node");
        Intrinsics.checkParameterIsNotNull(tagName, "tagName");
        node.tagName(tagName);
    }

    @NotNull
    protected List<Element> scoreElements(@NotNull List<? extends Element> elementsToScore, @NotNull ArticleGrabberOptions options) {
        int i;
        Intrinsics.checkParameterIsNotNull(elementsToScore, "elementsToScore");
        Intrinsics.checkParameterIsNotNull(options, "options");
        ArrayList arrayList = new ArrayList();
        for (Element element : elementsToScore) {
            if (element.parentNode() != null) {
                if (ProcessorBase.getInnerText$default(this, element, this.regEx, false, 4, null).length() >= 25) {
                    List<Element> nodeAncestors = getNodeAncestors(element, 3);
                    if (nodeAncestors.size() != 0) {
                        double size = 0.0d + 1 + StringsKt.split$default((CharSequence) r0, new char[]{','}, false, 0, 6, (Object) null).size() + Math.min(Math.floor(r0.length() / 100.0d), 3.0d);
                        int size2 = nodeAncestors.size() - 1;
                        if (0 <= size2) {
                            while (true) {
                                Element element2 = nodeAncestors.get(i);
                                String tagName = element2.tagName();
                                if (!(tagName == null || StringsKt.isBlank(tagName))) {
                                    if (getReadabilityObject(element2) == null) {
                                        arrayList.add(element2);
                                        initializeNode(element2, options);
                                    }
                                    int i2 = i == 0 ? 1 : i == 1 ? 2 : i * 3;
                                    ReadabilityObject readabilityObject = getReadabilityObject(element2);
                                    if (readabilityObject != null) {
                                        readabilityObject.setContentScore(readabilityObject.getContentScore() + (size / i2));
                                    }
                                    i = i != size2 ? i + 1 : 0;
                                }
                            }
                        }
                    }
                }
            }
        }
        return arrayList;
    }

    /* JADX WARN: Code restructure failed: missing block: B:10:0x01eb, code lost:
    
        r0.setContentScore(r0.getContentScore() + 3);
     */
    /* JADX WARN: Code restructure failed: missing block: B:12:0x00f8, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlBlockQuote.TAG_NAME) != false) goto L65;
     */
    /* JADX WARN: Code restructure failed: missing block: B:14:0x0106, code lost:
    
        if (r0.equals("address") != false) goto L66;
     */
    /* JADX WARN: Code restructure failed: missing block: B:16:0x0114, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlDefinitionList.TAG_NAME) != false) goto L66;
     */
    /* JADX WARN: Code restructure failed: missing block: B:18:0x0122, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlHeading1.TAG_NAME) != false) goto L67;
     */
    /* JADX WARN: Code restructure failed: missing block: B:19:0x0207, code lost:
    
        r0.setContentScore(r0.getContentScore() - 5);
     */
    /* JADX WARN: Code restructure failed: missing block: B:21:0x0130, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlHeading2.TAG_NAME) != false) goto L67;
     */
    /* JADX WARN: Code restructure failed: missing block: B:23:0x013e, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlHeading3.TAG_NAME) != false) goto L67;
     */
    /* JADX WARN: Code restructure failed: missing block: B:25:0x014c, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlHeading4.TAG_NAME) != false) goto L67;
     */
    /* JADX WARN: Code restructure failed: missing block: B:27:0x015a, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlHeading5.TAG_NAME) != false) goto L67;
     */
    /* JADX WARN: Code restructure failed: missing block: B:29:0x0168, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlHeading6.TAG_NAME) != false) goto L67;
     */
    /* JADX WARN: Code restructure failed: missing block: B:34:0x0183, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlTableDataCell.TAG_NAME) != false) goto L65;
     */
    /* JADX WARN: Code restructure failed: missing block: B:36:0x0191, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlDefinitionTerm.TAG_NAME) != false) goto L66;
     */
    /* JADX WARN: Code restructure failed: missing block: B:38:0x019f, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlForm.TAG_NAME) != false) goto L66;
     */
    /* JADX WARN: Code restructure failed: missing block: B:40:0x01ad, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlTableHeaderCell.TAG_NAME) != false) goto L67;
     */
    /* JADX WARN: Code restructure failed: missing block: B:42:0x01bb, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlUnorderedList.TAG_NAME) != false) goto L66;
     */
    /* JADX WARN: Code restructure failed: missing block: B:44:0x01c9, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlOrderedList.TAG_NAME) != false) goto L66;
     */
    /* JADX WARN: Code restructure failed: missing block: B:46:0x01d7, code lost:
    
        if (r0.equals("li") != false) goto L66;
     */
    /* JADX WARN: Code restructure failed: missing block: B:6:0x00dc, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlDefinitionDescription.TAG_NAME) != false) goto L66;
     */
    /* JADX WARN: Code restructure failed: missing block: B:7:0x01f9, code lost:
    
        r0.setContentScore(r0.getContentScore() - 3);
     */
    /* JADX WARN: Code restructure failed: missing block: B:9:0x00ea, code lost:
    
        if (r0.equals(com.gargoylesoftware.htmlunit.html.HtmlPreformattedText.TAG_NAME) != false) goto L65;
     */
    @org.jetbrains.annotations.NotNull
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    protected net.dankito.readability4j.model.ReadabilityObject initializeNode(@org.jetbrains.annotations.NotNull org.jsoup.nodes.Element r8, @org.jetbrains.annotations.NotNull net.dankito.readability4j.model.ArticleGrabberOptions r9) {
        /*
            Method dump skipped, instructions count: 548
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: net.dankito.readability4j.processor.ArticleGrabber.initializeNode(org.jsoup.nodes.Element, net.dankito.readability4j.model.ArticleGrabberOptions):net.dankito.readability4j.model.ReadabilityObject");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public int getClassWeight(@NotNull Element e, @NotNull ArticleGrabberOptions options) {
        Intrinsics.checkParameterIsNotNull(e, "e");
        Intrinsics.checkParameterIsNotNull(options, "options");
        if (!options.getWeightClasses()) {
            return 0;
        }
        int i = 0;
        if (!StringsKt.isBlank(e.className())) {
            RegExUtil regExUtil = this.regEx;
            String className = e.className();
            Intrinsics.checkExpressionValueIsNotNull(className, "e.className()");
            if (regExUtil.isNegative(className)) {
                i = 0 - 25;
            }
            RegExUtil regExUtil2 = this.regEx;
            String className2 = e.className();
            Intrinsics.checkExpressionValueIsNotNull(className2, "e.className()");
            if (regExUtil2.isPositive(className2)) {
                i += 25;
            }
        }
        if (!StringsKt.isBlank(e.id())) {
            RegExUtil regExUtil3 = this.regEx;
            String id = e.id();
            Intrinsics.checkExpressionValueIsNotNull(id, "e.id()");
            if (regExUtil3.isNegative(id)) {
                i -= 25;
            }
            RegExUtil regExUtil4 = this.regEx;
            String id2 = e.id();
            Intrinsics.checkExpressionValueIsNotNull(id2, "e.id()");
            if (regExUtil4.isPositive(id2)) {
                i += 25;
            }
        }
        return i;
    }

    @NotNull
    protected List<Element> getNodeAncestors(@NotNull Element node, int i) {
        Intrinsics.checkParameterIsNotNull(node, "node");
        int i2 = 0;
        ArrayList arrayList = new ArrayList();
        Element element = node;
        while (true) {
            Element element2 = element;
            if (element2.parent() == null) {
                break;
            }
            arrayList.add(element2.parent());
            i2++;
            if (i2 == i) {
                break;
            }
            element = element2.parent();
            Intrinsics.checkExpressionValueIsNotNull(element, "next.parent()");
        }
        return arrayList;
    }

    @NotNull
    public static /* bridge */ /* synthetic */ List getNodeAncestors$default(ArticleGrabber articleGrabber, Element element, int i, int i2, Object obj) {
        if (obj != null) {
            throw new UnsupportedOperationException("Super calls with default arguments not supported in this target, function: getNodeAncestors");
        }
        if ((i2 & 2) != 0) {
            i = 0;
        }
        return articleGrabber.getNodeAncestors(element, i);
    }

    /* JADX WARN: Code restructure failed: missing block: B:23:0x00d4, code lost:
    
        r0.add(r20, r0);
     */
    /* JADX WARN: Code restructure failed: missing block: B:24:0x00e6, code lost:
    
        if (r0.size() <= r7.nbTopCandidates) goto L142;
     */
    /* JADX WARN: Code restructure failed: missing block: B:26:0x00e9, code lost:
    
        r0.remove(r7.nbTopCandidates);
     */
    @org.jetbrains.annotations.NotNull
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    protected kotlin.Pair<org.jsoup.nodes.Element, java.lang.Boolean> getTopCandidate(@org.jetbrains.annotations.NotNull org.jsoup.nodes.Element r8, @org.jetbrains.annotations.NotNull java.util.List<? extends org.jsoup.nodes.Element> r9, @org.jetbrains.annotations.NotNull net.dankito.readability4j.model.ArticleGrabberOptions r10) {
        /*
            Method dump skipped, instructions count: 1030
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: net.dankito.readability4j.processor.ArticleGrabber.getTopCandidate(org.jsoup.nodes.Element, java.util.List, net.dankito.readability4j.model.ArticleGrabberOptions):kotlin.Pair");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public double getLinkDensity(@NotNull Element element) {
        Intrinsics.checkParameterIsNotNull(element, "element");
        int length = ProcessorBase.getInnerText$default(this, element, this.regEx, false, 4, null).length();
        if (length == 0) {
            return 0.0d;
        }
        int i = 0;
        for (Element linkNode : element.getElementsByTag("a")) {
            Intrinsics.checkExpressionValueIsNotNull(linkNode, "linkNode");
            i += ProcessorBase.getInnerText$default(this, linkNode, this.regEx, false, 4, null).length();
        }
        return i / length;
    }

    /* JADX WARN: Code restructure failed: missing block: B:18:0x00d4, code lost:
    
        if (r2 != null) goto L22;
     */
    @org.jetbrains.annotations.NotNull
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    protected org.jsoup.nodes.Element createArticleContent(@org.jetbrains.annotations.NotNull org.jsoup.nodes.Document r8, @org.jetbrains.annotations.NotNull org.jsoup.nodes.Element r9, boolean r10) {
        /*
            Method dump skipped, instructions count: 516
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: net.dankito.readability4j.processor.ArticleGrabber.createArticleContent(org.jsoup.nodes.Document, org.jsoup.nodes.Element, boolean):org.jsoup.nodes.Element");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean shouldKeepSibling(@NotNull Element sibling) {
        Intrinsics.checkParameterIsNotNull(sibling, "sibling");
        return Intrinsics.areEqual(sibling.tagName(), HtmlParagraph.TAG_NAME);
    }

    protected void prepArticle(@NotNull Element articleContent, @NotNull ArticleGrabberOptions options, @NotNull ArticleMetadata metadata) {
        String title;
        boolean contains$default;
        Intrinsics.checkParameterIsNotNull(articleContent, "articleContent");
        Intrinsics.checkParameterIsNotNull(options, "options");
        Intrinsics.checkParameterIsNotNull(metadata, "metadata");
        cleanStyles(articleContent);
        markDataTables(articleContent);
        cleanConditionally(articleContent, HtmlForm.TAG_NAME, options);
        cleanConditionally(articleContent, HtmlFieldSet.TAG_NAME, options);
        clean(articleContent, HtmlObject.TAG_NAME);
        clean(articleContent, HtmlEmbed.TAG_NAME);
        clean(articleContent, HtmlHeading1.TAG_NAME);
        clean(articleContent, HtmlFooter.TAG_NAME);
        clean(articleContent, HtmlLink.TAG_NAME);
        Regex regex = new Regex("share");
        for (Element topCandidate : articleContent.children()) {
            Intrinsics.checkExpressionValueIsNotNull(topCandidate, "topCandidate");
            cleanMatchedNodes(topCandidate, regex);
        }
        Elements elementsByTag = articleContent.getElementsByTag(HtmlHeading2.TAG_NAME);
        if (elementsByTag.size() == 1 && (title = metadata.getTitle()) != null && title.length() > 0) {
            float length = (elementsByTag.get(0).text().length() - title.length()) / title.length();
            if (Math.abs(length) < 0.5d) {
                if (length > 0) {
                    contains$default = StringsKt.contains$default((CharSequence) elementsByTag.get(0).text(), (CharSequence) title, false, 2, (Object) null);
                } else {
                    String text = elementsByTag.get(0).text();
                    Intrinsics.checkExpressionValueIsNotNull(text, "h2[0].text()");
                    contains$default = StringsKt.contains$default((CharSequence) title, (CharSequence) text, false, 2, (Object) null);
                }
                if (contains$default) {
                    clean(articleContent, HtmlHeading2.TAG_NAME);
                }
            }
        }
        clean(articleContent, HtmlInlineFrame.TAG_NAME);
        clean(articleContent, "input");
        clean(articleContent, HtmlTextArea.TAG_NAME);
        clean(articleContent, "select");
        clean(articleContent, HtmlButton.TAG_NAME);
        cleanHeaders(articleContent, options);
        cleanConditionally(articleContent, HtmlTable.TAG_NAME, options);
        cleanConditionally(articleContent, HtmlUnorderedList.TAG_NAME, options);
        cleanConditionally(articleContent, HtmlDivision.TAG_NAME, options);
        removeNodes(articleContent, HtmlParagraph.TAG_NAME, new Function1<Element, Boolean>() { // from class: net.dankito.readability4j.processor.ArticleGrabber$prepArticle$3
            @Override // kotlin.jvm.functions.Function1
            public /* bridge */ /* synthetic */ Boolean invoke(Element element) {
                return Boolean.valueOf(invoke2(element));
            }

            /* renamed from: invoke, reason: avoid collision after fix types in other method */
            public final boolean invoke2(@NotNull Element paragraph) {
                Intrinsics.checkParameterIsNotNull(paragraph, "paragraph");
                return ((paragraph.getElementsByTag(HtmlImage.TAG_NAME).size() + paragraph.getElementsByTag(HtmlEmbed.TAG_NAME).size()) + paragraph.getElementsByTag(HtmlObject.TAG_NAME).size()) + paragraph.getElementsByTag(HtmlInlineFrame.TAG_NAME).size() == 0 && ProcessorBase.getInnerText$default(ArticleGrabber.this, paragraph, null, false, 2, null).length() == 0;
            }

            /* JADX INFO: Access modifiers changed from: package-private */
            {
                super(1);
            }
        });
        for (Element element : articleContent.select("br")) {
            Element nextElement = nextElement(element.nextSibling(), this.regEx);
            if (nextElement != null && Intrinsics.areEqual(nextElement.tagName(), HtmlParagraph.TAG_NAME)) {
                element.remove();
            }
        }
    }

    protected void cleanStyles(@NotNull Element e) {
        Intrinsics.checkParameterIsNotNull(e, "e");
        if (Intrinsics.areEqual(e.tagName(), HtmlSvg.TAG_NAME)) {
            return;
        }
        if (e.className() != "readability-styled") {
            Iterator<T> it = Companion.getPRESENTATIONAL_ATTRIBUTES().iterator();
            while (it.hasNext()) {
                e.removeAttr((String) it.next());
            }
            if (Companion.getDEPRECATED_SIZE_ATTRIBUTE_ELEMS().contains(e.tagName())) {
                e.removeAttr(ThumbnailType.WIDTH);
                e.removeAttr(ThumbnailType.HEIGHT);
            }
        }
        for (Element child : e.children()) {
            Intrinsics.checkExpressionValueIsNotNull(child, "child");
            cleanStyles(child);
        }
    }

    protected void markDataTables(@NotNull Element root) {
        Intrinsics.checkParameterIsNotNull(root, "root");
        for (Element table : root.getElementsByTag(HtmlTable.TAG_NAME)) {
            if (Intrinsics.areEqual(table.attr(PackageDocumentBase.OPFAttributes.role), "presentation")) {
                Intrinsics.checkExpressionValueIsNotNull(table, "table");
                setReadabilityDataTable(table, false);
            } else if (Intrinsics.areEqual(table.attr("datatable"), "0")) {
                Intrinsics.checkExpressionValueIsNotNull(table, "table");
                setReadabilityDataTable(table, false);
            } else if (!StringsKt.isBlank(table.attr(HtmlSummary.TAG_NAME))) {
                Intrinsics.checkExpressionValueIsNotNull(table, "table");
                setReadabilityDataTable(table, true);
            } else {
                Elements elementsByTag = table.getElementsByTag(HtmlCaption.TAG_NAME);
                if (elementsByTag.size() <= 0 || elementsByTag.get(0).childNodeSize() <= 0) {
                    Iterator<T> it = Companion.getDATA_TABLE_DESCENDANTS().iterator();
                    while (true) {
                        if (it.hasNext()) {
                            if (table.getElementsByTag((String) it.next()).size() > 0) {
                                Companion.getLog().debug("Data table because found data-y descendant");
                                Intrinsics.checkExpressionValueIsNotNull(table, "table");
                                setReadabilityDataTable(table, true);
                                break;
                            }
                        } else if (table.getElementsByTag(HtmlTable.TAG_NAME).size() > 0) {
                            Intrinsics.checkExpressionValueIsNotNull(table, "table");
                            setReadabilityDataTable(table, false);
                        } else {
                            Intrinsics.checkExpressionValueIsNotNull(table, "table");
                            Pair<Integer, Integer> rowAndColumnCount = getRowAndColumnCount(table);
                            if (rowAndColumnCount.getFirst().intValue() >= 10 || rowAndColumnCount.getSecond().intValue() > 4) {
                                setReadabilityDataTable(table, true);
                            } else {
                                setReadabilityDataTable(table, rowAndColumnCount.getFirst().intValue() * rowAndColumnCount.getSecond().intValue() > 10);
                            }
                        }
                    }
                } else {
                    Intrinsics.checkExpressionValueIsNotNull(table, "table");
                    setReadabilityDataTable(table, true);
                }
            }
        }
    }

    @NotNull
    protected Pair<Integer, Integer> getRowAndColumnCount(@NotNull Element table) {
        int i;
        int i2;
        int i3;
        int i4;
        Intrinsics.checkParameterIsNotNull(table, "table");
        int i5 = 0;
        int i6 = 0;
        for (Element element : table.getElementsByTag(HtmlTableRow.TAG_NAME)) {
            int i7 = i5;
            try {
                int parseInt = Integer.parseInt(element.attr("rowspan"));
                i = i7;
                i2 = parseInt;
            } catch (Exception e) {
                i = i7;
                i2 = 1;
            }
            i5 = i + i2;
            int i8 = 0;
            for (Element element2 : element.getElementsByTag(HtmlTableDataCell.TAG_NAME)) {
                int i9 = i8;
                try {
                    int parseInt2 = Integer.parseInt(element2.attr("colspan"));
                    i3 = i9;
                    i4 = parseInt2;
                } catch (Exception e2) {
                    i3 = i9;
                    i4 = 1;
                }
                i8 = i3 + i4;
            }
            i6 = Math.max(i6, i8);
        }
        return new Pair<>(Integer.valueOf(i5), Integer.valueOf(i6));
    }

    protected void cleanConditionally(@NotNull Element e, @NotNull String tag, @NotNull ArticleGrabberOptions options) {
        Intrinsics.checkParameterIsNotNull(e, "e");
        Intrinsics.checkParameterIsNotNull(tag, "tag");
        Intrinsics.checkParameterIsNotNull(options, "options");
        if (options.getCleanConditionally()) {
            removeNodes(e, tag, new ArticleGrabber$cleanConditionally$1(this, options, Intrinsics.areEqual(tag, HtmlUnorderedList.TAG_NAME) || Intrinsics.areEqual(tag, HtmlOrderedList.TAG_NAME)));
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean hasAncestorTag(@NotNull Element node, @NotNull String tagName, int i, @Nullable Function1<? super Element, Boolean> function1) {
        Intrinsics.checkParameterIsNotNull(node, "node");
        Intrinsics.checkParameterIsNotNull(tagName, "tagName");
        String lowerCase = tagName.toLowerCase();
        Intrinsics.checkExpressionValueIsNotNull(lowerCase, "(this as java.lang.String).toLowerCase()");
        Element element = node;
        int i2 = 0;
        while (element.parent() != null) {
            if (i > 0 && i2 > i) {
                return false;
            }
            if (Intrinsics.areEqual(element.parent().tagName(), lowerCase)) {
                if (function1 == null) {
                    return true;
                }
                Element parent = element.parent();
                Intrinsics.checkExpressionValueIsNotNull(parent, "parent.parent()");
                if (function1.invoke(parent).booleanValue()) {
                    return true;
                }
            }
            Element parent2 = element.parent();
            Intrinsics.checkExpressionValueIsNotNull(parent2, "parent.parent()");
            element = parent2;
            i2++;
        }
        return false;
    }

    public static /* bridge */ /* synthetic */ boolean hasAncestorTag$default(ArticleGrabber articleGrabber, Element element, String str, int i, Function1 function1, int i2, Object obj) {
        if (obj != null) {
            throw new UnsupportedOperationException("Super calls with default arguments not supported in this target, function: hasAncestorTag");
        }
        if ((i2 & 4) != 0) {
            i = 3;
        }
        if ((i2 & 8) != 0) {
            function1 = (Function1) null;
        }
        return articleGrabber.hasAncestorTag(element, str, i, function1);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public int getCharCount(@NotNull Element node, char c) {
        Intrinsics.checkParameterIsNotNull(node, "node");
        return StringsKt.split$default((CharSequence) ProcessorBase.getInnerText$default(this, node, this.regEx, false, 4, null), new char[]{c}, false, 0, 6, (Object) null).size() - 1;
    }

    public static /* bridge */ /* synthetic */ int getCharCount$default(ArticleGrabber articleGrabber, Element element, char c, int i, Object obj) {
        if (obj != null) {
            throw new UnsupportedOperationException("Super calls with default arguments not supported in this target, function: getCharCount");
        }
        if ((i & 2) != 0) {
            c = ',';
        }
        return articleGrabber.getCharCount(element, c);
    }

    protected void clean(@NotNull Element e, @NotNull String tag) {
        Intrinsics.checkParameterIsNotNull(e, "e");
        Intrinsics.checkParameterIsNotNull(tag, "tag");
        final boolean contains = Companion.getEMBEDDED_NODES().contains(tag);
        removeNodes(e, tag, new Function1<Element, Boolean>() { // from class: net.dankito.readability4j.processor.ArticleGrabber$clean$1
            @Override // kotlin.jvm.functions.Function1
            public /* bridge */ /* synthetic */ Boolean invoke(Element element) {
                return Boolean.valueOf(invoke2(element));
            }

            /* renamed from: invoke, reason: avoid collision after fix types in other method */
            public final boolean invoke2(@NotNull Element element) {
                Intrinsics.checkParameterIsNotNull(element, "element");
                if (!contains) {
                    return true;
                }
                Attributes attributes = element.attributes();
                ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(attributes, 10));
                Iterator<Attribute> it = attributes.iterator();
                while (it.hasNext()) {
                    arrayList.add(it.next().getValue());
                }
                if (ArticleGrabber.this.getRegEx().isVideo(CollectionsKt.joinToString$default(arrayList, "|", null, null, 0, null, null, 62, null))) {
                    return false;
                }
                RegExUtil regEx = ArticleGrabber.this.getRegEx();
                String html = element.html();
                Intrinsics.checkExpressionValueIsNotNull(html, "element.html()");
                return !regEx.isVideo(html);
            }

            /* JADX INFO: Access modifiers changed from: package-private */
            /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
            {
                super(1);
            }
        });
    }

    protected void cleanMatchedNodes(@NotNull Element e, @NotNull Regex regex) {
        Intrinsics.checkParameterIsNotNull(e, "e");
        Intrinsics.checkParameterIsNotNull(regex, "regex");
        Element nextNode = getNextNode(e, true);
        Element nextNode$default = getNextNode$default(this, e, false, 2, null);
        while (true) {
            Element element = nextNode$default;
            if (element == null || !(!Intrinsics.areEqual(element, nextNode))) {
                return;
            } else {
                nextNode$default = regex.containsMatchIn(new StringBuilder().append(element.className()).append(" ").append(element.id()).toString()) ? removeAndGetNext(element, regex.getPattern()) : getNextNode$default(this, element, false, 2, null);
            }
        }
    }

    protected void cleanHeaders(@NotNull final Element e, @NotNull final ArticleGrabberOptions options) {
        Intrinsics.checkParameterIsNotNull(e, "e");
        Intrinsics.checkParameterIsNotNull(options, "options");
        for (String it : Arrays.asList(HtmlHeading1.TAG_NAME, HtmlHeading2.TAG_NAME)) {
            Intrinsics.checkExpressionValueIsNotNull(it, "it");
            removeNodes(e, it, new Function1<Element, Boolean>() { // from class: net.dankito.readability4j.processor.ArticleGrabber$cleanHeaders$$inlined$forEach$lambda$1
                /* JADX INFO: Access modifiers changed from: package-private */
                /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
                {
                    super(1);
                }

                @Override // kotlin.jvm.functions.Function1
                public /* bridge */ /* synthetic */ Boolean invoke(Element element) {
                    return Boolean.valueOf(invoke2(element));
                }

                /* renamed from: invoke, reason: avoid collision after fix types in other method */
                public final boolean invoke2(@NotNull Element header) {
                    Intrinsics.checkParameterIsNotNull(header, "header");
                    return ArticleGrabber.this.getClassWeight(header, options) < 0;
                }
            });
        }
    }

    @Nullable
    protected Element removeAndGetNext(@NotNull Element node, @NotNull String reason) {
        Intrinsics.checkParameterIsNotNull(node, "node");
        Intrinsics.checkParameterIsNotNull(reason, "reason");
        Element nextNode = getNextNode(node, true);
        printAndRemove(node, reason);
        return nextNode;
    }

    @Nullable
    public static /* bridge */ /* synthetic */ Element removeAndGetNext$default(ArticleGrabber articleGrabber, Element element, String str, int i, Object obj) {
        if (obj != null) {
            throw new UnsupportedOperationException("Super calls with default arguments not supported in this target, function: removeAndGetNext");
        }
        if ((i & 2) != 0) {
            str = "";
        }
        return articleGrabber.removeAndGetNext(element, str);
    }

    @Nullable
    protected Element getNextNode(@NotNull Element node, boolean z) {
        Element element;
        Intrinsics.checkParameterIsNotNull(node, "node");
        if (!z && node.children().size() > 0) {
            return node.child(0);
        }
        Element nextElementSibling = node.nextElementSibling();
        if (nextElementSibling != null) {
            return nextElementSibling;
        }
        Element parent = node.parent();
        while (true) {
            element = parent;
            if (element == null || element.nextElementSibling() != null) {
                break;
            }
            parent = element.parent();
        }
        if (element != null) {
            return element.nextElementSibling();
        }
        return null;
    }

    @Nullable
    public static /* bridge */ /* synthetic */ Element getNextNode$default(ArticleGrabber articleGrabber, Element element, boolean z, int i, Object obj) {
        if (obj != null) {
            throw new UnsupportedOperationException("Super calls with default arguments not supported in this target, function: getNextNode");
        }
        if ((i & 2) != 0) {
            z = false;
        }
        return articleGrabber.getNextNode(element, z);
    }

    protected void getTextDirection(@NotNull Element topCandidate, @NotNull Document doc) {
        Intrinsics.checkParameterIsNotNull(topCandidate, "topCandidate");
        Intrinsics.checkParameterIsNotNull(doc, "doc");
        Set mutableSet = CollectionsKt.toMutableSet(Arrays.asList(topCandidate.parent(), topCandidate));
        Element parent = topCandidate.parent();
        Intrinsics.checkExpressionValueIsNotNull(parent, "topCandidate.parent()");
        mutableSet.addAll(getNodeAncestors$default(this, parent, 0, 2, null));
        mutableSet.add(doc.body());
        mutableSet.add(doc.selectFirst("html"));
        Iterator it = mutableSet.iterator();
        while (it.hasNext()) {
            String attr = ((Element) it.next()).attr(HtmlDirectory.TAG_NAME);
            if (!StringsKt.isBlank(attr)) {
                this.articleDir = attr;
                return;
            }
        }
    }

    @Nullable
    protected ReadabilityObject getReadabilityObject(@NotNull Element element) {
        Intrinsics.checkParameterIsNotNull(element, "element");
        return this.readabilityObjects.get(element);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean getReadabilityDataTable(@NotNull Element table) {
        Intrinsics.checkParameterIsNotNull(table, "table");
        Boolean bool = this.readabilityDataTable.get(table);
        if (bool != null) {
            return bool.booleanValue();
        }
        return false;
    }

    protected void setReadabilityDataTable(@NotNull Element table, boolean z) {
        Intrinsics.checkParameterIsNotNull(table, "table");
        this.readabilityDataTable.put(table, Boolean.valueOf(z));
    }

    @NotNull
    protected final ReadabilityOptions getOptions() {
        return this.options;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @NotNull
    public final RegExUtil getRegEx() {
        return this.regEx;
    }

    public ArticleGrabber(@NotNull ReadabilityOptions options, @NotNull RegExUtil regEx) {
        Intrinsics.checkParameterIsNotNull(options, "options");
        Intrinsics.checkParameterIsNotNull(regEx, "regEx");
        this.options = options;
        this.regEx = regEx;
        this.nbTopCandidates = this.options.getNbTopCandidates();
        this.wordThreshold = this.options.getWordThreshold();
        this.readabilityObjects = new HashMap<>();
        this.readabilityDataTable = new HashMap<>();
    }

    public /* synthetic */ ArticleGrabber(ReadabilityOptions readabilityOptions, RegExUtil regExUtil, int i, DefaultConstructorMarker defaultConstructorMarker) {
        this(readabilityOptions, (i & 2) != 0 ? new RegExUtil(null, null, null, null, null, null, null, null, null, null, null, null, null, 8191, null) : regExUtil);
    }
}
