/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.lvg.ae;

import gov.nih.nlm.nls.lvg.Api.LvgCmdApi;
import gov.nih.nlm.nls.lvg.Api.LvgLexItemApi;
import gov.nih.nlm.nls.lvg.Lib.Category;
import gov.nih.nlm.nls.lvg.Lib.LexItem;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.Vector;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.ListFactory;
import org.apache.ctakes.lvg.resource.LvgCmdApiResource;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.Lemma;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.cas.FSList;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.resource.ResourceInitializationException;

@PipeBitInfo(name="LVG Basetoken Annotator", description="Adds cononical form of Base Tokens.", dependencies={PipeBitInfo.TypeProduct.SECTION, PipeBitInfo.TypeProduct.BASE_TOKEN})
public class LvgBaseTokenAnnotator
extends JCasAnnotator_ImplBase {
    public static final String PARAM_POST_LEMMAS = "PostLemmas";
    public static final String PARAM_USE_LEMMA_CACHE = "UseLemmaCache";
    public static final String PARAM_LEMMA_CACHE_FILE_LOCATION = "LemmaCacheFileLocation";
    public static final String PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF = "LemmaCacheFrequencyCutoff";
    private Logger logger = Logger.getLogger((String)((Object)((Object)this)).getClass().getName());
    private final String LVGCMDAPI_RESRC_KEY = "LvgCmdApi";
    private LvgCmdApi lvgCmd;
    private LvgLexItemApi lvgLexItem;
    private UimaContext context;
    private boolean useSegments;
    private Set skipSegmentsSet;
    private boolean useCmdCache;
    private String cmdCacheFileLocation;
    private int cmdCacheFreqCutoff;
    private Map xeroxTreebankMap;
    private boolean postLemmas;
    private boolean useLemmaCache;
    private String lemmaCacheFileLocation;
    private int lemmaCacheFreqCutoff;
    private Map normCacheMap;
    private Map lemmaCacheMap;
    private Set exclusionSet;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.context = aContext;
        this.configInit();
        try {
            LvgCmdApiResource lvgResource = (LvgCmdApiResource)this.context.getResourceObject("LvgCmdApi");
            if (lvgResource == null) {
                throw new AnnotatorInitializationException((Throwable)new Exception("Unable to locate resource with key=LvgCmdApi."));
            }
            this.lvgCmd = lvgResource.getLvg();
            if (this.useCmdCache) {
                this.logger.info((Object)("Loading Cmd cache=" + this.cmdCacheFileLocation));
                this.loadCmdCacheFile(this.cmdCacheFileLocation);
                this.logger.info((Object)("Loaded " + this.normCacheMap.size() + " entries"));
            }
            if (this.postLemmas) {
                this.lvgLexItem = lvgResource.getLvgLex();
                if (this.useLemmaCache) {
                    this.logger.info((Object)("Loading Lemma cache=" + this.lemmaCacheFileLocation));
                    this.loadLemmaCacheFile(this.lemmaCacheFileLocation);
                    this.logger.info((Object)("Loaded " + this.lemmaCacheMap.size() + " entries"));
                }
            }
        }
        catch (Exception e) {
            throw new ResourceInitializationException((Throwable)e);
        }
    }

    private void configInit() throws ResourceInitializationException {
        Boolean useLemmaCache;
        this.useSegments = (Boolean)this.context.getConfigParameterValue("UseSegments");
        String[] skipSegmentIDs = (String[])this.context.getConfigParameterValue("SegmentsToSkip");
        this.skipSegmentsSet = new HashSet();
        for (int i = 0; i < skipSegmentIDs.length; ++i) {
            this.skipSegmentsSet.add(skipSegmentIDs[i]);
        }
        String[] xtMaps = (String[])this.context.getConfigParameterValue("XeroxTreebankMap");
        this.xeroxTreebankMap = new HashMap();
        for (int i = 0; i < xtMaps.length; ++i) {
            StringTokenizer tokenizer = new StringTokenizer(xtMaps[i], "|");
            if (tokenizer.countTokens() != 2) continue;
            String xTag = tokenizer.nextToken();
            String tTag = tokenizer.nextToken();
            this.xeroxTreebankMap.put(xTag, tTag);
        }
        this.useCmdCache = (Boolean)this.context.getConfigParameterValue("UseCmdCache");
        this.cmdCacheFileLocation = (String)this.context.getConfigParameterValue("CmdCacheFileLocation");
        this.cmdCacheFreqCutoff = (Integer)this.context.getConfigParameterValue("CmdCacheFrequencyCutoff");
        String[] wordsToExclude = (String[])this.context.getConfigParameterValue("ExclusionSet");
        this.exclusionSet = new HashSet();
        for (int i = 0; i < wordsToExclude.length; ++i) {
            this.exclusionSet.add(wordsToExclude[i]);
        }
        Boolean bPostLemmas = (Boolean)this.context.getConfigParameterValue(PARAM_POST_LEMMAS);
        boolean bl = this.postLemmas = bPostLemmas == null ? false : bPostLemmas;
        if (this.postLemmas && (useLemmaCache = Boolean.valueOf((useLemmaCache = (Boolean)this.context.getConfigParameterValue(PARAM_USE_LEMMA_CACHE)) == null ? false : useLemmaCache)).booleanValue()) {
            this.lemmaCacheFileLocation = (String)this.context.getConfigParameterValue(PARAM_LEMMA_CACHE_FILE_LOCATION);
            if (this.lemmaCacheFileLocation == null) {
                throw new ResourceInitializationException((Throwable)new Exception("Parameter for LemmaCacheFileLocation was not set."));
            }
            Integer lemmaCacheFreqCutoff = (Integer)this.context.getConfigParameterValue(PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF);
            lemmaCacheFreqCutoff = lemmaCacheFreqCutoff == null ? Integer.valueOf(20) : Integer.valueOf(lemmaCacheFreqCutoff);
        }
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        this.logger.info((Object)" process(JCas, ResultSpecification)");
        String text = jcas.getDocumentText();
        try {
            if (this.useSegments) {
                JFSIndexRepository indexes = jcas.getJFSIndexRepository();
                for (Segment segmentAnnotation : indexes.getAnnotationIndex(Segment.type)) {
                    String segmentID = segmentAnnotation.getId();
                    if (this.skipSegmentsSet.contains(segmentID)) continue;
                    int start = segmentAnnotation.getBegin();
                    int end = segmentAnnotation.getEnd();
                    this.annotateRange(jcas, text, start, end);
                }
            } else {
                this.annotateRange(jcas, text, 0, text.length());
            }
        }
        catch (Exception e) {
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }

    protected void annotateRange(JCas jcas, String text, int rangeBegin, int rangeEnd) throws AnalysisEngineProcessException {
        JFSIndexRepository indexes = jcas.getJFSIndexRepository();
        for (BaseToken tokenAnnotation : indexes.getAnnotationIndex(BaseToken.type)) {
            String token;
            if (tokenAnnotation.getBegin() < rangeBegin || tokenAnnotation.getEnd() > rangeEnd || this.exclusionSet.contains(token = text.substring(tokenAnnotation.getBegin(), tokenAnnotation.getEnd()))) continue;
            this.setNormalizedForm(tokenAnnotation, token);
            if (!this.postLemmas) continue;
            this.setLemma(tokenAnnotation, token, jcas);
        }
    }

    private void setNormalizedForm(BaseToken tokenAnnotation, String token) throws AnalysisEngineProcessException {
        String normalizedForm = null;
        if (!this.useCmdCache || (normalizedForm = (String)this.normCacheMap.get(token)) == null) {
            // empty if block
        }
        if (normalizedForm == null) {
            try {
                String out = this.lvgCmd.MutateToString(token);
                String[] output = out.split("\\|");
                if (output != null && output.length >= 2 && !output[1].matches("No Output")) {
                    normalizedForm = output[1];
                }
            }
            catch (Exception e) {
                throw new AnalysisEngineProcessException((Throwable)e);
            }
        }
        if (normalizedForm != null) {
            tokenAnnotation.setNormalizedForm(normalizedForm);
        }
    }

    private void setLemma(BaseToken wordAnnotation, String word, JCas jcas) throws AnalysisEngineProcessException {
        Set lemmaSet;
        HashMap lemmaMap = null;
        if (this.useLemmaCache && (lemmaSet = (Set)this.lemmaCacheMap.get(word)) != null) {
            lemmaMap = new HashMap();
            for (LemmaLocalClass l : lemmaSet) {
                lemmaMap.put(l.word, l.posSet);
            }
        }
        if (lemmaMap == null) {
            lemmaMap = new HashMap();
            try {
                Vector lexItems = this.lvgLexItem.MutateLexItem(word);
                for (LexItem li : lexItems) {
                    Category c = li.GetTargetCategory();
                    String lemmaStr = li.GetTargetTerm();
                    long[] bitValues = Category.ToValuesArray((long)c.GetValue());
                    for (int i = 0; i < bitValues.length; ++i) {
                        String lemmaPos = Category.ToName((long)bitValues[i]);
                        String treebankTag = (String)this.xeroxTreebankMap.get(lemmaPos);
                        if (treebankTag == null) continue;
                        Set posSet = null;
                        posSet = lemmaMap.containsKey(lemmaStr) ? (Set)lemmaMap.get(lemmaStr) : new HashSet();
                        posSet.add(treebankTag);
                        lemmaMap.put(lemmaStr, posSet);
                    }
                }
            }
            catch (Exception e) {
                throw new AnalysisEngineProcessException((Throwable)e);
            }
        }
        ArrayList<Lemma> lemmas = new ArrayList<Lemma>(lemmaMap.keySet().size());
        for (String form : lemmaMap.keySet()) {
            Set posTagSet = (Set)lemmaMap.get(form);
            for (String pos : posTagSet) {
                Lemma lemma = new Lemma(jcas);
                lemma.setKey(form);
                lemma.setPosTag(pos);
                lemmas.add(lemma);
            }
        }
        Lemma[] lemmaArray = lemmas.toArray(new Lemma[lemmas.size()]);
        FSList fsList = ListFactory.buildList((JCas)jcas, (TOP[])lemmaArray);
        wordAnnotation.setLemmaEntries(fsList);
    }

    private void loadCmdCacheFile(String cpLocation) throws FileNotFoundException, IOException {
        InputStream inStream = ((Object)((Object)this)).getClass().getResourceAsStream(cpLocation);
        if (inStream == null) {
            throw new FileNotFoundException("Unable to find: " + cpLocation);
        }
        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
        this.normCacheMap = new HashMap();
        String line = br.readLine();
        while (line != null) {
            StringTokenizer st = new StringTokenizer(line, "|");
            if (st.countTokens() == 7) {
                int freq = Integer.parseInt(st.nextToken());
                if (freq > this.cmdCacheFreqCutoff) {
                    String origWord = st.nextToken();
                    String normWord = st.nextToken();
                    if (!this.normCacheMap.containsKey(origWord)) {
                        this.normCacheMap.put(origWord, normWord);
                    }
                } else {
                    this.logger.debug((Object)("Discarding norm cache line due to frequency cutoff: " + line));
                }
            } else {
                this.logger.warn((Object)("Invalid LVG norm cache line: " + line));
            }
            line = br.readLine();
        }
    }

    private void loadLemmaCacheFile(String cpLocation) throws FileNotFoundException, IOException {
        InputStream inStream = ((Object)((Object)this)).getClass().getResourceAsStream(cpLocation);
        if (inStream == null) {
            throw new FileNotFoundException("Unable to find: " + cpLocation);
        }
        BufferedReader br = new BufferedReader(new InputStreamReader(inStream));
        this.lemmaCacheMap = new HashMap();
        String line = br.readLine();
        while (line != null) {
            StringTokenizer st = new StringTokenizer(line, "|");
            if (st.countTokens() == 4) {
                int freq = Integer.parseInt(st.nextToken());
                if (freq > this.lemmaCacheFreqCutoff) {
                    String origWord = st.nextToken();
                    String lemmaWord = st.nextToken();
                    String combinedCategories = st.nextToken();
                    combinedCategories = combinedCategories.substring(1, combinedCategories.length() - 1);
                    LemmaLocalClass l = new LemmaLocalClass();
                    l.word = lemmaWord;
                    l.posSet = new HashSet();
                    long bitVector = Category.ToValue((String)combinedCategories);
                    long[] bitValues = Category.ToValuesArray((long)bitVector);
                    for (int i = 0; i < bitValues.length; ++i) {
                        String pos = Category.ToName((long)bitValues[i]);
                        String treebankTag = (String)this.xeroxTreebankMap.get(pos);
                        if (treebankTag == null) continue;
                        l.posSet.add(treebankTag);
                    }
                    Set<LemmaLocalClass> lemmaSet = null;
                    lemmaSet = !this.lemmaCacheMap.containsKey(origWord) ? new HashSet<LemmaLocalClass>() : (Set)this.lemmaCacheMap.get(origWord);
                    lemmaSet.add(l);
                    this.lemmaCacheMap.put(origWord, lemmaSet);
                } else {
                    this.logger.debug((Object)("Discarding lemma cache line due to frequency cutoff: " + line));
                }
            } else {
                this.logger.warn((Object)("Invalid LVG lemma cache line: " + line));
            }
            line = br.readLine();
        }
    }

    class LemmaLocalClass {
        public String word;
        public Set posSet;

        LemmaLocalClass() {
        }
    }
}

