/*
 * Decompiled with CFR 0.152.
 */
package com.googlecode.clearnlp.constituent;

import com.googlecode.clearnlp.constituent.CTLib;
import com.googlecode.clearnlp.constituent.CTNode;
import com.googlecode.clearnlp.constituent.CTTree;
import com.googlecode.clearnlp.morphology.MPLibEn;
import java.util.List;
import java.util.regex.Pattern;

public class CTLibEn
extends CTLib {
    public static final String PTAG_S = "S";
    public static final String PTAG_SBAR = "SBAR";
    public static final String PTAG_SBARQ = "SBARQ";
    public static final String PTAG_SINV = "SINV";
    public static final String PTAG_SQ = "SQ";
    public static final String PTAG_ADJP = "ADJP";
    public static final String PTAG_ADVP = "ADVP";
    public static final String PTAG_CAPTION = "CAPTION";
    public static final String PTAG_CIT = "CIT";
    public static final String PTAG_CONJP = "CONJP";
    public static final String PTAG_EDITED = "EDITED";
    public static final String PTAG_EMBED = "EMBED";
    public static final String PTAG_FRAG = "FRAG";
    public static final String PTAG_HEADING = "HEADING";
    public static final String PTAG_INTJ = "INTJ";
    public static final String PTAG_LST = "LST";
    public static final String PTAG_META = "META";
    public static final String PTAG_NAC = "NAC";
    public static final String PTAG_NML = "NML";
    public static final String PTAG_NP = "NP";
    public static final String PTAG_NX = "NX";
    public static final String PTAG_PP = "PP";
    public static final String PTAG_PRN = "PRN";
    public static final String PTAG_PRT = "PRT";
    public static final String PTAG_QP = "QP";
    public static final String PTAG_RRC = "RRC";
    public static final String PTAG_TITLE = "TITLE";
    public static final String PTAG_TYPO = "TYPO";
    public static final String PTAG_UCP = "UCP";
    public static final String PTAG_VP = "VP";
    public static final String PTAG_WHADJP = "WHADJP";
    public static final String PTAG_WHADVP = "WHADVP";
    public static final String PTAG_WHNP = "WHNP";
    public static final String PTAG_WHPP = "WHPP";
    public static final String POS_ADD = "ADD";
    public static final String POS_AFX = "AFX";
    public static final String POS_CC = "CC";
    public static final String POS_CD = "CD";
    public static final String POS_CODE = "CODE";
    public static final String POS_DT = "DT";
    public static final String POS_EX = "EX";
    public static final String POS_FW = "FW";
    public static final String POS_IN = "IN";
    public static final String POS_JJ = "JJ";
    public static final String POS_JJR = "JJR";
    public static final String POS_JJS = "JJS";
    public static final String POS_LS = "LS";
    public static final String POS_MD = "MD";
    public static final String POS_NN = "NN";
    public static final String POS_NNS = "NNS";
    public static final String POS_NNP = "NNP";
    public static final String POS_NNPS = "NNPS";
    public static final String POS_PDT = "PDT";
    public static final String POS_POS = "POS";
    public static final String POS_PRP = "PRP";
    public static final String POS_PRPS = "PRP$";
    public static final String POS_RB = "RB";
    public static final String POS_RBR = "RBR";
    public static final String POS_RBS = "RBS";
    public static final String POS_RP = "RP";
    public static final String POS_TO = "TO";
    public static final String POS_UH = "UH";
    public static final String POS_VB = "VB";
    public static final String POS_VBD = "VBD";
    public static final String POS_VBG = "VBG";
    public static final String POS_VBN = "VBN";
    public static final String POS_VBP = "VBP";
    public static final String POS_VBZ = "VBZ";
    public static final String POS_WDT = "WDT";
    public static final String POS_WP = "WP";
    public static final String POS_WPS = "WP$";
    public static final String POS_WRB = "WRB";
    public static final String POS_DOLLAR = "$";
    public static final String POS_COLON = ":";
    public static final String POS_COMMA = ",";
    public static final String POS_PERIOD = ".";
    public static final String POS_LQ = "``";
    public static final String POS_RQ = "''";
    public static final String POS_LRB = "-LRB-";
    public static final String POS_RRB = "-RRB-";
    public static final String POS_HYPH = "HYPH";
    public static final String POS_NFP = "NFP";
    public static final String POS_SYM = "SYM";
    public static final String POS_PUNC = "PUNC";
    public static final String FTAG_ADV = "ADV";
    public static final String FTAG_BNF = "BNF";
    public static final String FTAG_CLF = "CLF";
    public static final String FTAG_CLR = "CLR";
    public static final String FTAG_DIR = "DIR";
    public static final String FTAG_DTV = "DTV";
    public static final String FTAG_ETC = "ETC";
    public static final String FTAG_EXT = "EXT";
    public static final String FTAG_HLN = "HLN";
    public static final String FTAG_IMP = "IMP";
    public static final String FTAG_LGS = "LGS";
    public static final String FTAG_LOC = "LOC";
    public static final String FTAG_MNR = "MNR";
    public static final String FTAG_NOM = "NOM";
    public static final String FTAG_PRD = "PRD";
    public static final String FTAG_PRP = "PRP";
    public static final String FTAG_PUT = "PUT";
    public static final String FTAG_SBJ = "SBJ";
    public static final String FTAG_SEZ = "SEZ";
    public static final String FTAG_TMP = "TMP";
    public static final String FTAG_TPC = "TPC";
    public static final String FTAG_TTL = "TTL";
    public static final String FTAG_UNF = "UNF";
    public static final String FTAG_VOC = "VOC";
    public static final String EC_EXP = "*EXP*";
    public static final String EC_ESM = "*?*";
    public static final String EC_ICH = "*ICH*";
    public static final String EC_NOT = "*NOT*";
    public static final String EC_ZERO = "0";
    public static final String EC_PPA = "*PPA*";
    public static final String EC_PRO = "*PRO*";
    public static final String EC_RNR = "*RNR*";
    public static final String EC_NULL = "*";
    public static final String EC_TRACE = "*T*";
    public static final String EC_UNIT = "*U*";
    public static final Pattern RE_COMP_POS = Pattern.compile("^(WDT|WP.*|WRB)$");
    public static final Pattern RE_COMP_FORM = Pattern.compile("^(how|however|that|what|whatever|whatsoever|when|whenever|where|whereby|wherein|whereupon|wherever|which|whichever|whither|who|whoever|whom|whose|why)$");
    public static final Pattern RE_NULL = Pattern.compile("^(\\*|\\*-.+)$");
    public static final Pattern RE_COMP_LINK = Pattern.compile("^(WHNP|WHPP|WHADVP)$");
    public static final Pattern RE_COMP_LINK_FORM = Pattern.compile("^(0|that|when|where|whereby|wherein|whereupon|which|who|whom|whose)$");
    public static final Pattern RE_ICH_PPA_RNR = Pattern.compile("\\*(ICH|PPA|RNR)\\*.*");

    public static void preprocessTree(CTTree tree) {
        CTLibEn.fixFunctionTags(tree);
        CTLibEn.linkReducedPassiveNulls(tree);
        CTLibEn.linkComplementizers(tree);
    }

    public static boolean isPassiveNull(CTNode node) {
        return node.isEmptyCategory() && RE_NULL.matcher(node.form).find() && node.parent != null && (node = node.parent).isPTag(PTAG_NP) && node.s_fTags.isEmpty() && node.parent != null && node.parent.isPTag(PTAG_VP) && node.i_siblingId > 0 && node.parent.getChild(node.i_siblingId - 1).isPTagAny(POS_VBN, POS_VBD);
    }

    public static boolean isPassiveVerb(CTNode node) {
        if (!node.isPTag(POS_VBN)) {
            return false;
        }
        CTNode tmp = node.getParent();
        if (tmp == null || !tmp.isPTag(PTAG_VP)) {
            return false;
        }
        if ((tmp = tmp.getParent()) == null || !tmp.isPTag(PTAG_VP)) {
            return true;
        }
        if ((tmp = tmp.getFirstChild("+VB.*")) != null) {
            return !MPLibEn.isHave(tmp.form);
        }
        return true;
    }

    public static boolean isComplementizer(CTNode node) {
        if (node.isPhrase()) {
            return false;
        }
        return RE_COMP_POS.matcher(node.pTag).find() || node.isPTag("-NONE-") && node.isForm(EC_ZERO);
    }

    public static CTNode getComplementizer(CTNode node) {
        if (!node.pTag.startsWith("WH")) {
            return null;
        }
        List<CTNode> terminals = node.getSubTerminals();
        if (node.isEmptyCategoryRec()) {
            return terminals.get(0);
        }
        for (CTNode term : terminals) {
            if (!RE_COMP_POS.matcher(term.pTag).find()) continue;
            return term;
        }
        for (CTNode term : terminals) {
            if (!RE_COMP_FORM.matcher(term.form.toLowerCase()).find()) continue;
            return term;
        }
        return null;
    }

    public static void linkReducedPassiveNulls(CTTree tree) {
        CTLibEn.linkReducedPassiveNullsAux(tree, tree.getRoot());
    }

    private static void linkReducedPassiveNullsAux(CTTree tree, CTNode curr) {
        String npRegex = "NP|NML";
        String vpRegex = "VP|RRC|UCP";
        if (CTLibEn.isPassiveNull(curr) && curr.form.equals(EC_NULL)) {
            List<CTNode> list;
            CTNode parent = curr.parent;
            if (parent.parent.coIndex != -1 && (list = tree.getCoIndexedEmptyCategories(parent.parent.coIndex)) != null) {
                parent = list.get(0);
            }
            CTNode vp = parent.getHighestChainedAncestor("+" + vpRegex);
            if (vp.parent.matchesPTag(npRegex) || vp.parent.hasFTag(FTAG_NOM)) {
                curr.antecedent = vp.getPrevSibling("+" + npRegex);
                if (curr.antecedent == null) {
                    curr.antecedent = vp.getPrevSibling("+NN.*");
                }
                if (curr.antecedent == null) {
                    curr.antecedent = vp.getPrevSibling(PTAG_QP);
                }
                if (curr.antecedent == null) {
                    curr.antecedent = vp.getPrevSibling("-NOM");
                }
            } else if (vp.parent.matchesPTag("S.*")) {
                curr.antecedent = vp.getPrevSibling(PTAG_NP, "-SBJ");
                if (curr.antecedent == null) {
                    curr.antecedent = vp.getNextSibling(PTAG_NP, "-SBJ");
                }
            }
        }
        for (CTNode child : curr.ls_children) {
            CTLibEn.linkReducedPassiveNullsAux(tree, child);
        }
    }

    public static void linkComplementizers(CTTree tree) {
        CTLibEn.linkComlementizersAux(tree, tree.getRoot());
    }

    private static void linkComlementizersAux(CTTree tree, CTNode curr) {
        if (RE_COMP_LINK.matcher(curr.pTag).find()) {
            CTNode comp = CTLibEn.getComplementizer(curr);
            CTNode sbar = curr.getHighestChainedAncestor(PTAG_SBAR);
            if (comp != null && sbar != null && !sbar.hasFTag(FTAG_NOM) && RE_COMP_LINK_FORM.matcher(comp.form.toLowerCase()).find()) {
                CTNode ante;
                CTNode p;
                if (sbar.coIndex != -1) {
                    List<CTNode> ecs = tree.getCoIndexedEmptyCategories(sbar.coIndex);
                    if (ecs != null) {
                        for (CTNode ec : ecs) {
                            if (!ec.form.startsWith(EC_ICH) || !ec.parent.isPTag(PTAG_SBAR)) continue;
                            sbar = ec.parent;
                            break;
                        }
                    }
                } else if (sbar.parent != null && sbar.parent.isPTag(PTAG_UCP)) {
                    sbar = sbar.getParent();
                }
                if ((p = sbar.parent) == null) {
                    return;
                }
                if (p.isPTag(PTAG_NP)) {
                    ante = sbar.getPrevSibling(PTAG_NP);
                    if (ante != null) {
                        comp.antecedent = ante;
                    }
                } else if (p.isPTag(PTAG_ADVP)) {
                    ante = sbar.getPrevSibling(PTAG_ADVP);
                    if (ante != null) {
                        comp.antecedent = ante;
                    }
                } else if (p.isPTag(PTAG_VP) && (ante = sbar.getPrevSibling("-PRD")) != null && (sbar.hasFTag(FTAG_CLF) || curr.isPTag(PTAG_WHNP) && ante.isPTag(PTAG_NP) || curr.isPTag(PTAG_WHPP) && ante.isPTag(PTAG_PP) || curr.isPTag(PTAG_WHADVP) && ante.isPTag(PTAG_ADVP))) {
                    comp.antecedent = ante;
                }
                ante = comp.antecedent;
                while (ante != null && ante.isEmptyCategoryRec()) {
                    ante = ante.getSubTerminals().get(0).getAntecedent();
                }
                comp.antecedent = ante;
            }
        } else {
            for (CTNode child : curr.ls_children) {
                CTLibEn.linkComlementizersAux(tree, child);
            }
        }
    }

    public static boolean containsCoordination(CTNode node) {
        return CTLibEn.containsCoordination(node, node.getChildren());
    }

    public static boolean containsCoordination(CTNode parent, List<CTNode> siblings) {
        if (parent.isPTag(PTAG_UCP)) {
            return true;
        }
        if (parent.isPTagAny(PTAG_NML, PTAG_NP) && CTLibEn.containsEtc(siblings)) {
            return true;
        }
        for (CTNode child : siblings) {
            if (!child.isPTagAny(POS_CC, PTAG_CONJP)) continue;
            return true;
        }
        return false;
    }

    private static boolean containsEtc(List<CTNode> children) {
        int size = children.size();
        for (int i = size - 1; i > 0; --i) {
            CTNode child = children.get(i);
            if (CTLibEn.isPunctuation(child)) continue;
            if (!CTLibEn.isEtc(child)) break;
            return true;
        }
        return false;
    }

    public static boolean isEtc(CTNode node) {
        if (node.hasFTag(FTAG_ETC)) {
            return true;
        }
        return node.getSubTerminals().get((int)0).form.equalsIgnoreCase("etc.");
    }

    public static boolean isCoordinator(CTNode node) {
        return CTLibEn.isConjunction(node) || CTLibEn.isSeparator(node);
    }

    public static boolean isConjunction(CTNode node) {
        return node.isPTag(POS_CC) || node.isPTag(PTAG_CONJP);
    }

    public static boolean isSeparator(CTNode node) {
        return node.isPTag(POS_COMMA) || node.isPTag(POS_COLON);
    }

    public static boolean isCorrelativeConjunction(CTNode node) {
        if (node.isPTag(POS_CC)) {
            String form = node.form.toLowerCase();
            return form.equals("either") || form.equals("neither") || form.equals("whether") || form.equals("both");
        }
        if (node.isPTag(PTAG_CONJP)) {
            String form = node.toForms(false, " ").toLowerCase();
            return form.equals("not only");
        }
        return false;
    }

    public static boolean isPunctuation(CTNode node) {
        return node.isPTagAny(POS_COLON, POS_COMMA, POS_PERIOD, POS_LQ, POS_RQ, POS_LRB, POS_RRB, POS_HYPH, POS_NFP, POS_SYM, POS_PUNC);
    }

    public static boolean isClause(CTNode node) {
        return node.isPTagAny(PTAG_S, PTAG_SQ, PTAG_SINV, PTAG_SBAR, PTAG_SBARQ);
    }

    public static boolean isAdjective(CTNode node) {
        return MPLibEn.isAdjective(node.pTag);
    }

    public static boolean isAdverb(CTNode node) {
        return MPLibEn.isAdverb(node.pTag);
    }

    public static boolean isNoun(CTNode node) {
        return MPLibEn.isNoun(node.pTag);
    }

    public static boolean isVerb(CTNode node) {
        return MPLibEn.isVerb(node.pTag);
    }

    public static boolean isNounPhrase(CTNode node) {
        return node.isPTagAny(PTAG_NP, PTAG_NML, PTAG_NX, PTAG_NAC);
    }

    public static boolean isLeftBracket(CTNode node) {
        return !node.isPhrase() && node.form.matches("^-L(R|S|C)B-$");
    }

    public static boolean isRightBracket(CTNode node) {
        return !node.isPhrase() && node.form.matches("^-R(R|S|C)B-$");
    }

    public static void fixFunctionTags(CTTree tree) {
        CTLibEn.fixFunctionTagsAux(tree.getRoot());
    }

    private static void fixFunctionTagsAux(CTNode node) {
        CTLibEn.fixSBJ(node);
        CTLibEn.fixLGS(node);
        CTLibEn.fixCLF(node);
        for (CTNode child : node.getChildren()) {
            CTLibEn.fixFunctionTagsAux(child);
        }
    }

    private static boolean fixSBJ(CTNode node) {
        CTNode parent;
        if (node.hasFTag(FTAG_SBJ) && (parent = node.getParent()).getChildrenSize() == 1 && !parent.isPTagAny(PTAG_EDITED, PTAG_EMBED) && parent.getFTags().isEmpty()) {
            node.removeFTag(FTAG_SBJ);
            parent.addFTag(FTAG_SBJ);
            parent.pTag = node.pTag;
            return true;
        }
        return false;
    }

    private static boolean fixLGS(CTNode node) {
        CTNode parent;
        if (node.hasFTag(FTAG_LGS) && !node.isPTag(PTAG_PP) && (parent = node.getParent()).isPTagAny(PTAG_PP, PTAG_SBAR)) {
            node.removeFTag(FTAG_LGS);
            parent.addFTag(FTAG_LGS);
            return true;
        }
        return false;
    }

    private static boolean fixCLF(CTNode node) {
        if (node.hasFTag(FTAG_CLF) && node.matchesPTag("S|SQ|SINV")) {
            CTNode desc = node.getFirstDescendant("+SBAR.*");
            node.removeFTag(FTAG_CLF);
            if (desc != null) {
                desc.addFTag(FTAG_CLF);
                return true;
            }
        }
        return false;
    }
}

