/*
 * Decompiled with CFR 0.152.
 */
package org.apache.ctakes.lvg.ae;

import gov.nih.nlm.nls.lvg.Api.LvgCmdApi;
import gov.nih.nlm.nls.lvg.Api.LvgLexItemApi;
import gov.nih.nlm.nls.lvg.Lib.Category;
import gov.nih.nlm.nls.lvg.Lib.LexItem;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.Vector;
import org.apache.commons.io.FileUtils;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.ListFactory;
import org.apache.ctakes.lvg.resource.LvgCmdApiResource;
import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
import org.apache.ctakes.typesystem.type.syntax.Lemma;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ExternalResource;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.ExternalResourceFactory;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.cas.FSList;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.resource.ResourceInitializationException;

@PipeBitInfo(name="LVG Annotator", description="Adds cononical form of words.", dependencies={PipeBitInfo.TypeProduct.SECTION, PipeBitInfo.TypeProduct.BASE_TOKEN})
public class LvgAnnotator
extends JCasAnnotator_ImplBase {
    public static final String[] defaultExclusionWords = new String[]{"And", "and", "By", "by", "For", "for", "In", "in", "Of", "of", "On", "on", "The", "the", "To", "to", "With", "with"};
    public static final String[] defaultTreebankMap = new String[]{"adj|JJ", "adv|RB", "aux|AUX", "compl|CS", "conj|CC", "det|DET", "modal|MD", "noun|NN", "prep|IN", "pron|PRP", "verb|VB"};
    public static final String PARAM_POST_LEMMAS = "PostLemmas";
    @ConfigurationParameter(name="PostLemmas", mandatory=false, defaultValue={"false"}, description="Whether to extract the lexical variants and write to cas (creates large files)")
    private boolean postLemmas;
    public static final String PARAM_USE_LEMMA_CACHE = "UseLemmaCache";
    @ConfigurationParameter(name="UseLemmaCache", mandatory=false, defaultValue={"false"}, description="Whether to use a cache for lemmas")
    private boolean useLemmaCache;
    public static final String PARAM_LEMMA_CACHE_FILE_LOCATION = "LemmaCacheFileLocation";
    @ConfigurationParameter(name="LemmaCacheFileLocation", mandatory=false, defaultValue={"/org/apache/ctakes/lvg/2005_lemma.voc"}, description="Path to lemma cache file -- if useLemmaCache and postLemmas are true")
    private String lemmaCacheFileLocation = null;
    public static final String PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF = "LemmaCacheFrequencyCutoff";
    @ConfigurationParameter(name="LemmaCacheFrequencyCutoff", mandatory=false, description="Threshold for the frequency of a lemma to be loaded into the cache", defaultValue={"20"})
    private int cmdCacheFreqCutoff;
    public static final String PARAM_USE_SEGMENTS = "UseSegments";
    @ConfigurationParameter(name="UseSegments", mandatory=false, defaultValue={"false"}, description="Whether to use segments found in upstream cTAKES components")
    private boolean useSegments;
    public static final String PARAM_SKIP_SEGMENTS = "SegmentsToSkip";
    @ConfigurationParameter(name="SegmentsToSkip", mandatory=false, defaultValue={}, description="Segment IDs to skip during processing")
    private String[] skipSegmentIDs;
    private Set<String> skipSegmentsSet;
    public static final String PARAM_XT_MAP = "XeroxTreebankMap";
    @ConfigurationParameter(name="XeroxTreebankMap", mandatory=false, description="Mapping from Xerox parts of speech to Treebank equivalents")
    private String[] xtMaps = defaultTreebankMap;
    private Map<String, String> xeroxTreebankMap;
    public static final String PARAM_USE_CMD_CACHE = "UseCmdCache";
    @ConfigurationParameter(name="UseCmdCache", mandatory=false, defaultValue={"false"}, description="Use cache to track canonical forms")
    private boolean useCmdCache;
    public static final String PARAM_CMD_CACHE_FILE = "CmdCacheFileLocation";
    @ConfigurationParameter(name="CmdCacheFileLocation", mandatory=false, defaultValue={"/org/apache/ctakes/lvg/2005_norm.voc"}, description="File with stored cache of canonical forms")
    private String cmdCacheFileLocation;
    public static final String PARAM_LEMMA_FREQ_CUTOFF = "CmdCacheFrequencyCutoff";
    @ConfigurationParameter(name="CmdCacheFrequencyCutoff", mandatory=false, description="Minimum frequency required for loading from cache", defaultValue={"20"})
    private int lemmaCacheFreqCutoff;
    public static final String PARAM_EXCLUSION_WORDS = "ExclusionSet";
    @ConfigurationParameter(name="ExclusionSet", mandatory=false, description="Words to exclude when doing LVG normalization")
    private String[] wordsToExclude = defaultExclusionWords;
    private Set<String> exclusionSet;
    private Logger logger = Logger.getLogger((String)((Object)((Object)this)).getClass().getName());
    public static final String PARAM_LVGCMDAPI_RESRC_KEY = "LvgCmdApi";
    @ExternalResource(key="LvgCmdApi", mandatory=true)
    private LvgCmdApiResource lvgResource;
    private LvgCmdApi lvgCmd;
    private LvgLexItemApi lvgLexItem;
    private Map<String, String> normCacheMap;
    private Map<String, Set<LemmaLocalClass>> lemmaCacheMap;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.configInit();
        try {
            this.lvgCmd = this.lvgResource.getLvg();
            if (this.useCmdCache) {
                this.logger.info((Object)("Loading Cmd cache=" + this.cmdCacheFileLocation));
                this.loadCmdCacheFile(this.cmdCacheFileLocation);
                this.logger.info((Object)("Loaded " + this.normCacheMap.size() + " entries"));
            }
            if (this.postLemmas) {
                this.lvgLexItem = this.lvgResource.getLvgLex();
                if (this.useLemmaCache) {
                    this.logger.info((Object)("Loading Lemma cache=" + this.lemmaCacheFileLocation));
                    this.loadLemmaCacheFile(this.lemmaCacheFileLocation);
                    this.logger.info((Object)("Loaded " + this.lemmaCacheMap.size() + " entries"));
                }
            }
        }
        catch (IOException e) {
            throw new ResourceInitializationException((Throwable)e);
        }
    }

    private void configInit() {
        int i;
        this.skipSegmentsSet = new HashSet<String>();
        for (i = 0; i < this.skipSegmentIDs.length; ++i) {
            this.skipSegmentsSet.add(this.skipSegmentIDs[i]);
        }
        this.xeroxTreebankMap = new HashMap<String, String>();
        for (i = 0; i < this.xtMaps.length; ++i) {
            StringTokenizer tokenizer = new StringTokenizer(this.xtMaps[i], "|");
            if (tokenizer.countTokens() != 2) continue;
            String xTag = tokenizer.nextToken();
            String tTag = tokenizer.nextToken();
            this.xeroxTreebankMap.put(xTag, tTag);
        }
        this.exclusionSet = new HashSet<String>();
        for (i = 0; i < this.wordsToExclude.length; ++i) {
            this.exclusionSet.add(this.wordsToExclude[i]);
        }
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        this.logger.info((Object)"process(JCas)");
        String text = jcas.getDocumentText();
        try {
            if (this.useSegments) {
                JFSIndexRepository indexes = jcas.getJFSIndexRepository();
                for (Segment segmentAnnotation : indexes.getAnnotationIndex(Segment.type)) {
                    String segmentID = segmentAnnotation.getId();
                    if (this.skipSegmentsSet.contains(segmentID)) continue;
                    int start = segmentAnnotation.getBegin();
                    int end = segmentAnnotation.getEnd();
                    this.annotateRange(jcas, text, start, end);
                }
            } else {
                this.annotateRange(jcas, text, 0, text.length());
            }
        }
        catch (Exception e) {
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }

    protected void annotateRange(JCas jcas, String text, int rangeBegin, int rangeEnd) throws AnalysisEngineProcessException {
        JFSIndexRepository indexes = jcas.getJFSIndexRepository();
        for (WordToken wordAnnotation : indexes.getAnnotationIndex(WordToken.type)) {
            if (wordAnnotation.getBegin() < rangeBegin || wordAnnotation.getEnd() > rangeEnd) continue;
            String word = text.substring(wordAnnotation.getBegin(), wordAnnotation.getEnd());
            String suggestion = wordAnnotation.getSuggestion();
            if (suggestion != null && suggestion.length() > 0) {
                word = suggestion;
            }
            if (this.exclusionSet.contains(word)) continue;
            this.setCanonicalForm(wordAnnotation, word);
            if (!this.postLemmas) continue;
            this.setLemma(wordAnnotation, word, jcas);
        }
    }

    private void setCanonicalForm(WordToken wordAnnotation, String word) throws AnalysisEngineProcessException {
        String canonicalForm = null;
        if (!this.useCmdCache || (canonicalForm = this.normCacheMap.get(word)) == null) {
            // empty if block
        }
        if (canonicalForm == null) {
            try {
                String out = this.lvgCmd.MutateToString(word);
                String[] output = out.split("\\|");
                if (output != null && output.length >= 2 && !output[1].matches("No Output")) {
                    canonicalForm = output[1];
                }
            }
            catch (Exception e) {
                throw new AnalysisEngineProcessException((Throwable)e);
            }
        }
        if (canonicalForm != null) {
            wordAnnotation.setCanonicalForm(canonicalForm);
        }
    }

    private void setLemma(WordToken wordAnnotation, String word, JCas jcas) throws AnalysisEngineProcessException {
        Set<LemmaLocalClass> lemmaSet;
        HashMap lemmaMap = null;
        if (this.useLemmaCache && (lemmaSet = this.lemmaCacheMap.get(word)) != null) {
            lemmaMap = new HashMap();
            for (LemmaLocalClass l : lemmaSet) {
                lemmaMap.put(l.word, l.posSet);
            }
        }
        if (lemmaMap == null) {
            lemmaMap = new HashMap();
            try {
                Vector lexItems = this.lvgLexItem.MutateLexItem(word);
                for (LexItem li : lexItems) {
                    Category c = li.GetTargetCategory();
                    String lemmaStr = li.GetTargetTerm();
                    long[] bitValues = Category.ToValuesArray((long)c.GetValue());
                    for (int i = 0; i < bitValues.length; ++i) {
                        String lemmaPos = Category.ToName((long)bitValues[i]);
                        String treebankTag = this.xeroxTreebankMap.get(lemmaPos);
                        if (treebankTag == null) continue;
                        Set posSet = null;
                        posSet = lemmaMap.containsKey(lemmaStr) ? (Set)lemmaMap.get(lemmaStr) : new HashSet();
                        posSet.add(treebankTag);
                        lemmaMap.put(lemmaStr, posSet);
                    }
                }
            }
            catch (Exception e) {
                throw new AnalysisEngineProcessException((Throwable)e);
            }
        }
        ArrayList<Lemma> lemmas = new ArrayList<Lemma>(lemmaMap.keySet().size());
        for (String form : lemmaMap.keySet()) {
            Set posTagSet = (Set)lemmaMap.get(form);
            for (String pos : posTagSet) {
                Lemma lemma = new Lemma(jcas);
                lemma.setKey(form);
                lemma.setPosTag(pos);
                lemmas.add(lemma);
            }
        }
        Lemma[] lemmaArray = lemmas.toArray(new Lemma[lemmas.size()]);
        FSList fsList = ListFactory.buildList((JCas)jcas, (TOP[])lemmaArray);
        wordAnnotation.setLemmaEntries(fsList);
    }

    private void loadCmdCacheFile(String cpLocation) throws FileNotFoundException, IOException {
        try (InputStream inStream = ((Object)((Object)this)).getClass().getResourceAsStream(cpLocation);
             BufferedReader br = new BufferedReader(new InputStreamReader(inStream));){
            this.normCacheMap = new HashMap<String, String>();
            String line = br.readLine();
            while (line != null) {
                StringTokenizer st = new StringTokenizer(line, "|");
                if (st.countTokens() == 7) {
                    int freq = Integer.parseInt(st.nextToken());
                    if (freq > this.cmdCacheFreqCutoff) {
                        String origWord = st.nextToken();
                        String normWord = st.nextToken();
                        if (!this.normCacheMap.containsKey(origWord)) {
                            this.normCacheMap.put(origWord, normWord);
                        }
                    } else {
                        this.logger.debug((Object)("Discarding norm cache line due to frequency cutoff: " + line));
                    }
                } else {
                    this.logger.warn((Object)("Invalid LVG norm cache line: " + line));
                }
                line = br.readLine();
            }
        }
    }

    private void loadLemmaCacheFile(String cpLocation) throws FileNotFoundException, IOException {
        try (InputStream inStream = ((Object)((Object)this)).getClass().getResourceAsStream(cpLocation);
             BufferedReader br = new BufferedReader(new InputStreamReader(inStream));){
            this.lemmaCacheMap = new HashMap<String, Set<LemmaLocalClass>>();
            String line = br.readLine();
            while (line != null) {
                StringTokenizer st = new StringTokenizer(line, "|");
                if (st.countTokens() == 4) {
                    int freq = Integer.parseInt(st.nextToken());
                    if (freq > this.lemmaCacheFreqCutoff) {
                        String origWord = st.nextToken();
                        String lemmaWord = st.nextToken();
                        String combinedCategories = st.nextToken();
                        combinedCategories = combinedCategories.substring(1, combinedCategories.length() - 1);
                        LemmaLocalClass l = new LemmaLocalClass();
                        l.word = lemmaWord;
                        l.posSet = new HashSet<String>();
                        long bitVector = Category.ToValue((String)combinedCategories);
                        long[] bitValues = Category.ToValuesArray((long)bitVector);
                        for (int i = 0; i < bitValues.length; ++i) {
                            String pos = Category.ToName((long)bitValues[i]);
                            String treebankTag = this.xeroxTreebankMap.get(pos);
                            if (treebankTag == null) continue;
                            l.posSet.add(treebankTag);
                        }
                        Set<LemmaLocalClass> lemmaSet = null;
                        lemmaSet = !this.lemmaCacheMap.containsKey(origWord) ? new HashSet<LemmaLocalClass>() : this.lemmaCacheMap.get(origWord);
                        lemmaSet.add(l);
                        this.lemmaCacheMap.put(origWord, lemmaSet);
                    } else {
                        this.logger.debug((Object)("Discarding lemma cache line due to frequency cutoff: " + line));
                    }
                } else {
                    this.logger.warn((Object)("Invalid LVG lemma cache line: " + line));
                }
                line = br.readLine();
            }
        }
    }

    public static String copyLvgFiles(String absolutePath) {
        String[] filesToCopy;
        String returnValue = "/tmp/data/config/lvg.properties";
        String prefix = "org/apache/ctakes/lvg/";
        for (String path : filesToCopy = new String[]{"data/config/lvg.properties", "data/HSqlDb/lvg2008.backup", "data/HSqlDb/lvg2008.data", "data/HSqlDb/lvg2008.properties", "data/HSqlDb/lvg2008.script", "data/misc/conjunctionWord.data", "data/misc/nonInfoWords.data", "data/misc/removeS.data", "data/misc/stopWords.data", "data/misc/symbolSynonyms.data", "data/rules/dm.rul", "data/rules/im.rul", "data/rules/plural.rul", "data/rules/verbinfl.rul", "data/rules/exceptionD.data", "data/rules/exceptionI.data", "data/rules/ruleD.data", "data/rules/ruleI.data", "data/rules/trieD.data", "data/rules/trieI.data", "data/Unicode/diacriticMap.data", "data/Unicode/ligatureMap.data", "data/Unicode/nonStripMap.data", "data/Unicode/synonymMap.data", "data/Unicode/symbolMap.data", "data/Unicode/unicodeMap.data"}) {
            InputStream stream = LvgAnnotator.class.getClassLoader().getResourceAsStream("org/apache/ctakes/lvg/" + path);
            File file = new File(absolutePath, path);
            Logger logger = Logger.getLogger((String)LvgAnnotator.class.getName());
            logger.info((Object)("Copying lvg-related file to " + file.getAbsolutePath()));
            try {
                FileUtils.copyInputStreamToFile((InputStream)stream, (File)file);
            }
            catch (IOException e) {
                throw new RuntimeException("Error copying temporary InpuStream " + stream.toString() + " to " + file.getAbsolutePath() + ".", e);
            }
        }
        return "/tmp/data/config/lvg.properties";
    }

    public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException, MalformedURLException {
        String lvgProperties = "org/apache/ctakes/lvg/data/config/lvg.properties";
        Logger logger = Logger.getLogger((String)LvgAnnotator.class.getName());
        URL url = LvgAnnotator.class.getClassLoader().getResource("org/apache/ctakes/lvg/data/config/lvg.properties");
        if (url != null) {
            logger.info((Object)("URL for lvg.properties =" + url.getFile()));
        } else {
            String absolutePath = "/tmp/";
            logger.info((Object)"URL==null");
            logger.info((Object)"Unable to find org/apache/ctakes/lvg/data/config/lvg.properties.");
            logger.info((Object)("Copying files and directories to under " + absolutePath));
            File lvgFile = new File(LvgAnnotator.copyLvgFiles(absolutePath));
            url = lvgFile.toURI().toURL();
        }
        return AnalysisEngineFactory.createEngineDescription(LvgAnnotator.class, (Object[])new Object[]{PARAM_USE_CMD_CACHE, false, PARAM_USE_LEMMA_CACHE, false, PARAM_USE_SEGMENTS, false, PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF, 20, PARAM_LEMMA_FREQ_CUTOFF, 20, PARAM_POST_LEMMAS, false, PARAM_LVGCMDAPI_RESRC_KEY, ExternalResourceFactory.createExternalResourceDescription(LvgCmdApiResourceImpl.class, (URL)url, (Object[])new Object[0])});
    }

    class LemmaLocalClass {
        public String word;
        public Set<String> posSet;

        LemmaLocalClass() {
        }
    }
}

