/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.assertion.medfacts.cleartk;

import java.io.File;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.commons.io.FilenameUtils;
import org.apache.ctakes.assertion.attributes.features.selection.FeatureSelection;
import org.apache.ctakes.assertion.medfacts.cleartk.extractors.FedaFeatureFunction;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.temporary.assertion.AssertionCuePhraseAnnotation;
import org.apache.ctakes.typesystem.type.textsem.EntityMention;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.ConfigurationParameterFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceCreationSpecifier;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.ml.CleartkAnnotator;
import org.cleartk.ml.Feature;
import org.cleartk.ml.Instance;
import org.cleartk.ml.TreeFeature;
import org.cleartk.ml.feature.extractor.CleartkExtractor;
import org.cleartk.ml.feature.extractor.CoveredTextExtractor;
import org.cleartk.ml.feature.extractor.FeatureExtractor1;
import org.cleartk.ml.feature.function.FeatureFunctionExtractor;

public abstract class AssertionCleartkAnalysisEngine
extends CleartkAnnotator<String> {
    Logger logger = Logger.getLogger(AssertionCleartkAnalysisEngine.class);
    public static final String PARAM_GOLD_VIEW_NAME = "GoldViewName";
    public static int relationId;
    public static final String FILE_TO_DOMAIN_MAP = "mapTrainFileToDomain";
    @ConfigurationParameter(name="GoldViewName", mandatory=false, description="view containing the manual identified annotations (especially EntityMention and EventMention annotations); needed for training")
    protected String goldViewName;
    public static final String PARAM_PRINT_ERRORS = "PrintErrors";
    @ConfigurationParameter(name="PrintErrors", mandatory=false, description="Print errors true/false", defaultValue={"false"})
    boolean printErrors;
    public static final String PARAM_PROBABILITY_OF_KEEPING_DEFAULT_EXAMPLE = "ProbabilityOfKeepingADefaultExample";
    @ConfigurationParameter(name="ProbabilityOfKeepingADefaultExample", mandatory=false, description="probability that a default example should be retained for training")
    protected double probabilityOfKeepingADefaultExample = 1.0;
    public static final String PARAM_PORTION_OF_DATA_TO_USE = "PortionOfDataToUse";
    @ConfigurationParameter(name="PortionOfDataToUse", mandatory=false, description="How much data to actually use during training (e.g. for building learning curves)")
    protected double portionOfDataToUse = 1.0;
    public static final String PARAM_FEATURE_SELECTION_THRESHOLD = "WhetherToDoFeatureSelection";
    @ConfigurationParameter(name="WhetherToDoFeatureSelection", mandatory=false, description="the Chi-squared threshold at which features should be removed")
    protected Float featureSelectionThreshold = Float.valueOf(0.0f);
    public static final String PARAM_FEATURE_CONFIG = "FEATURE_CONFIG";
    @ConfigurationParameter(name="FEATURE_CONFIG", description="Feature configuration to use (for experiments)", mandatory=false)
    protected FEATURE_CONFIG featConfig = FEATURE_CONFIG.ALL_SYN;
    public static final String PARAM_FEATURE_SELECTION_URI = "FeatureSelectionURI";
    @ConfigurationParameter(mandatory=false, name="FeatureSelectionURI", description="provides a URI where the feature selection data will be written")
    protected URI featureSelectionURI;
    protected static Random coin;
    protected static final String FEATURE_SELECTION_NAME = "SelectNeighborFeatures";
    @ConfigurationParameter(name="mapTrainFileToDomain", mandatory=false, description="a map of filenames to their respective domains (i.e., directories that contain them)")
    protected String fileDomainMap;
    protected Map<String, String> fileToDomain = new HashMap<String, String>();
    protected String lastLabel;
    protected List<CleartkExtractor<IdentifiedAnnotation, BaseToken>> contextFeatureExtractors;
    protected List<CleartkExtractor<IdentifiedAnnotation, BaseToken>> tokenContextFeatureExtractors;
    protected List<CleartkExtractor<IdentifiedAnnotation, BaseToken>> tokenCleartkExtractors;
    protected List<FeatureExtractor1<IdentifiedAnnotation>> entityFeatureExtractors;
    protected List<FeatureExtractor1<IdentifiedAnnotation>> entityTreeExtractors;
    protected CleartkExtractor<IdentifiedAnnotation, BaseToken> cuePhraseInWindowExtractor;
    protected List<FeatureFunctionExtractor<IdentifiedAnnotation>> featureFunctionExtractors;
    protected FedaFeatureFunction ffDomainAdaptor;
    protected FeatureSelection<String> featureSelection;

    public abstract void setClassLabel(IdentifiedAnnotation var1, Instance<String> var2) throws AnalysisEngineProcessException;

    protected abstract void initializeFeatureSelection() throws ResourceInitializationException;

    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);
        if (null != this.fileDomainMap) {
            String[] dirs;
            for (String dir : dirs = this.fileDomainMap.split("[;:]")) {
                String domainId = AssertionCleartkAnalysisEngine.normalizeToDomain(dir);
                File dataDir = new File(dir);
                if (dataDir.listFiles() == null) continue;
                for (File f : dataDir.listFiles()) {
                    this.fileToDomain.put(FilenameUtils.removeExtension((String)f.getName()), domainId);
                }
            }
        }
        if (this.isTraining() && this.goldViewName == null) {
            throw new IllegalArgumentException("GoldViewName must be defined during training");
        }
        this.entityFeatureExtractors = new ArrayList<FeatureExtractor1<IdentifiedAnnotation>>();
        this.tokenCleartkExtractors = new ArrayList<CleartkExtractor<IdentifiedAnnotation, BaseToken>>();
        CleartkExtractor tokenExtraction1 = new CleartkExtractor(BaseToken.class, (FeatureExtractor1)new CoveredTextExtractor(), new CleartkExtractor.Context[]{new CleartkExtractor.LastCovered(2), new CleartkExtractor.Preceding(5), new CleartkExtractor.Following(4), new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Preceding(3)}), new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Following(3)}), new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Preceding(5)}), new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Following(5)}), new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Preceding(10)}), new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Following(10)})});
        this.tokenCleartkExtractors.add((CleartkExtractor<IdentifiedAnnotation, BaseToken>)tokenExtraction1);
        this.cuePhraseInWindowExtractor = new CleartkExtractor(BaseToken.class, (FeatureExtractor1)new CoveredTextExtractor(), new CleartkExtractor.Context[]{new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Covered()})});
        if (!this.fileToDomain.isEmpty()) {
            this.ffDomainAdaptor = new FedaFeatureFunction(new ArrayList<String>(new HashSet<String>(this.fileToDomain.values())));
        }
        this.entityTreeExtractors = new ArrayList<FeatureExtractor1<IdentifiedAnnotation>>();
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        JCas identifiedAnnotationView;
        String documentId = DocumentIDAnnotationUtil.getDocumentID((JCas)jCas);
        String domainId = "";
        if (documentId != null) {
            this.logger.debug((Object)("processing next doc: " + documentId));
            if (!this.fileToDomain.isEmpty()) {
                domainId = this.fileToDomain.get(documentId);
                this.ffDomainAdaptor.setDomain(domainId);
            }
        } else {
            this.logger.debug((Object)"processing next doc (doc id is null)");
        }
        this.lastLabel = "<BEGIN>";
        if (this.isTraining()) {
            try {
                identifiedAnnotationView = jCas.getView(this.goldViewName);
            }
            catch (CASException e) {
                throw new AnalysisEngineProcessException((Throwable)e);
            }
        } else {
            identifiedAnnotationView = jCas;
        }
        Collection entities = JCasUtil.select((JCas)identifiedAnnotationView, IdentifiedAnnotation.class);
        for (IdentifiedAnnotation identifiedAnnotation : entities) {
            int eemTypeId;
            if (!(identifiedAnnotation instanceof EntityMention) && !(identifiedAnnotation instanceof EventMention)) continue;
            IdentifiedAnnotation entityOrEventMention = identifiedAnnotation;
            if (entityOrEventMention.getPolarity() == -1) {
                this.logger.debug((Object)String.format(" - identified annotation: [%d-%d] polarity %d (%s)", entityOrEventMention.getBegin(), entityOrEventMention.getEnd(), entityOrEventMention.getPolarity(), entityOrEventMention.getClass().getName()));
            }
            Instance instance = new Instance();
            ArrayList sents = new ArrayList(JCasUtil.selectCovering((JCas)jCas, Sentence.class, (int)entityOrEventMention.getBegin(), (int)entityOrEventMention.getEnd()));
            Sentence coveringSent = null;
            if (sents.size() > 0) {
                coveringSent = (Sentence)sents.get(0);
            }
            if (this.ffDomainAdaptor == null) {
                for (CleartkExtractor<IdentifiedAnnotation, BaseToken> extractor : this.tokenCleartkExtractors) {
                    if (coveringSent != null) {
                        instance.addAll((Collection)extractor.extractWithin(identifiedAnnotationView, (Annotation)entityOrEventMention, (Annotation)coveringSent));
                        continue;
                    }
                    instance.addAll((Collection)extractor.extract(identifiedAnnotationView, (Annotation)entityOrEventMention));
                }
            }
            if (coveringSent != null) {
                List cues = JCasUtil.selectCovered(AssertionCuePhraseAnnotation.class, (AnnotationFS)coveringSent);
                int closest = Integer.MAX_VALUE;
                AssertionCuePhraseAnnotation closestCue = null;
                for (AssertionCuePhraseAnnotation cue : cues) {
                    List tokens = JCasUtil.selectBetween(BaseToken.class, (AnnotationFS)cue, (AnnotationFS)entityOrEventMention);
                    if (tokens.size() >= closest) continue;
                    closestCue = cue;
                    closest = tokens.size();
                }
                if (closestCue != null && closest < 21) {
                    instance.add(new Feature("ClosestCue_Word", (Object)closestCue.getCoveredText()));
                    instance.add(new Feature("ClosestCue_PhraseFamily", (Object)closestCue.getCuePhraseAssertionFamily()));
                    instance.add(new Feature("ClosestCue_PhraseCategory", (Object)closestCue.getCuePhraseCategory()));
                    if (!this.fileToDomain.isEmpty() && this.ffDomainAdaptor != null) {
                        instance.addAll(this.ffDomainAdaptor.apply(new Feature("ClosestCue_Word", (Object)closestCue.getCoveredText())));
                        instance.addAll(this.ffDomainAdaptor.apply(new Feature("ClosestCue_PhraseFamily", (Object)closestCue.getCuePhraseAssertionFamily())));
                        instance.addAll(this.ffDomainAdaptor.apply(new Feature("ClosestCue_PhraseCategory", (Object)closestCue.getCuePhraseCategory())));
                    }
                }
            }
            if ((eemTypeId = entityOrEventMention.getTypeID()) == 6) {
                instance.add(new Feature((Object)"ENTITY_TYPE_ANAT_SITE"));
                if (!this.fileToDomain.isEmpty() && this.ffDomainAdaptor != null) {
                    instance.addAll(this.ffDomainAdaptor.apply(new Feature((Object)"ENTITY_TYPE_ANAT_SITE")));
                }
            }
            if (this.ffDomainAdaptor == null) {
                for (FeatureExtractor1<IdentifiedAnnotation> extractor : this.entityFeatureExtractors) {
                    instance.addAll((Collection)extractor.extract(jCas, (Annotation)entityOrEventMention));
                }
            }
            for (FeatureExtractor1<IdentifiedAnnotation> extractor : this.entityTreeExtractors) {
                instance.addAll((Collection)extractor.extract(jCas, (Annotation)entityOrEventMention));
            }
            List<Feature> feats = instance.getFeatures();
            for (Feature feature : feats) {
                if (feature instanceof TreeFeature || feature.getName() != null && (feature.getName().startsWith("TreeFrag") || feature.getName().startsWith("WORD") || feature.getName().startsWith("NEG")) || feature.getName() != null && (feature.getName().contains("_TreeFrag") || feature.getName().contains("_WORD") || feature.getName().contains("_NEG")) || !(feature.getValue() instanceof String)) continue;
                feature.setValue((Object)((String)feature.getValue()).toLowerCase());
            }
            if (!this.fileToDomain.isEmpty() && this.ffDomainAdaptor != null) {
                for (FeatureFunctionExtractor featureFunctionExtractor : this.featureFunctionExtractors) {
                    instance.addAll((Collection)featureFunctionExtractor.extract(jCas, (Annotation)entityOrEventMention));
                }
            }
            this.setClassLabel(entityOrEventMention, (Instance<String>)instance);
            if (!this.isTraining()) continue;
            if (this.featureSelection != null) {
                feats = this.featureSelection.transform(feats);
            }
            if (instance.getOutcome() == null || !(coin.nextDouble() < this.portionOfDataToUse)) continue;
            this.dataWriter.write(new Instance(instance.getOutcome(), feats));
        }
    }

    public static AnalysisEngineDescription getDescription(Object ... additionalConfiguration) throws ResourceInitializationException {
        AnalysisEngineDescription desc = AnalysisEngineFactory.createEngineDescription(AssertionCleartkAnalysisEngine.class, (Object[])new Object[0]);
        if (additionalConfiguration.length > 0) {
            ConfigurationParameterFactory.addConfigurationParameters((ResourceCreationSpecifier)desc, (Object[])additionalConfiguration);
        }
        return desc;
    }

    public Map<String, String> getTrainFileToDomain() {
        return this.fileToDomain;
    }

    public void setTrainFileToDomain(Map<String, String> trainFileToDomain) {
        this.fileToDomain = trainFileToDomain;
    }

    public static String normalizeToDomain(String dir) {
        String[] p = dir.split("/");
        ArrayList parts = new ArrayList();
        Collections.addAll(parts, p);
        Collections.reverse(parts);
        for (String part : parts) {
            if (part.toLowerCase().startsWith("test") || part.toLowerCase().startsWith("train") || part.toLowerCase().startsWith("dev")) continue;
            return part;
        }
        return dir;
    }

    static {
        coin = new Random(0L);
    }

    public static enum FEATURE_CONFIG {
        NO_SEM,
        NO_SYN,
        STK,
        STK_FRAGS,
        PTK,
        PTK_FRAGS,
        DEP_REGEX,
        DEP_REGEX_FRAGS,
        ALL_SYN;

    }
}

