/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.scoring.similarity.util;

import java.io.Reader;
import java.io.StringReader;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.standard.ClassicTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.nutch.scoring.similarity.util.LuceneAnalyzerUtil;

public class LuceneTokenizer {
    private TokenStream tokenStream;
    private TokenizerType tokenizer;
    private LuceneAnalyzerUtil.StemFilterType stemFilterType;
    private CharArraySet stopSet = null;

    public LuceneTokenizer(String content, TokenizerType tokenizer, boolean useStopFilter, LuceneAnalyzerUtil.StemFilterType stemFilterType) {
        this.tokenizer = tokenizer;
        this.stemFilterType = stemFilterType;
        if (useStopFilter) {
            this.stopSet = EnglishAnalyzer.ENGLISH_STOP_WORDS_SET;
        }
        this.tokenStream = this.createTokenStream(content);
    }

    public LuceneTokenizer(String content, TokenizerType tokenizer, List<String> stopWords, boolean addToDefault, LuceneAnalyzerUtil.StemFilterType stemFilterType) {
        this.tokenizer = tokenizer;
        this.stemFilterType = stemFilterType;
        if (addToDefault) {
            CharArraySet stopSet = CharArraySet.copy((Set)EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
            for (String word : stopWords) {
                stopSet.add(word);
            }
            this.stopSet = stopSet;
        } else {
            this.stopSet = new CharArraySet(stopWords, true);
        }
        this.tokenStream = this.createTokenStream(content);
    }

    public TokenStream getTokenStream() {
        return this.tokenStream;
    }

    public LuceneTokenizer(String content, TokenizerType tokenizer, LuceneAnalyzerUtil.StemFilterType stemFilterType, int mingram, int maxgram) {
        this.tokenizer = tokenizer;
        this.stemFilterType = stemFilterType;
        this.tokenStream = this.createNGramTokenStream(content, mingram, maxgram);
    }

    private TokenStream createTokenStream(String content) {
        this.tokenStream = this.generateTokenStreamFromText(content, this.tokenizer);
        this.tokenStream = new LowerCaseFilter(this.tokenStream);
        if (this.stopSet != null) {
            this.tokenStream = this.applyStopFilter(this.stopSet);
        }
        this.tokenStream = this.applyStemmer(this.stemFilterType);
        return this.tokenStream;
    }

    private TokenStream generateTokenStreamFromText(String content, TokenizerType tokenizerType) {
        ClassicTokenizer tokenizer = null;
        switch (tokenizerType) {
            case CLASSIC: {
                tokenizer = new ClassicTokenizer();
                break;
            }
            default: {
                tokenizer = new StandardTokenizer();
            }
        }
        tokenizer.setReader((Reader)new StringReader(content));
        this.tokenStream = tokenizer;
        return this.tokenStream;
    }

    private TokenStream createNGramTokenStream(String content, int mingram, int maxgram) {
        StandardTokenizer tokenizer = new StandardTokenizer();
        tokenizer.setReader((Reader)new StringReader(content));
        this.tokenStream = new LowerCaseFilter((TokenStream)tokenizer);
        this.tokenStream = this.applyStemmer(this.stemFilterType);
        ShingleFilter shingleFilter = new ShingleFilter(this.tokenStream, mingram, maxgram);
        shingleFilter.setOutputUnigrams(false);
        this.tokenStream = shingleFilter;
        return this.tokenStream;
    }

    private TokenStream applyStopFilter(CharArraySet stopWords) {
        this.tokenStream = new StopFilter(this.tokenStream, stopWords);
        return this.tokenStream;
    }

    private TokenStream applyStemmer(LuceneAnalyzerUtil.StemFilterType stemFilterType) {
        switch (stemFilterType) {
            case ENGLISHMINIMALSTEM_FILTER: {
                this.tokenStream = new EnglishMinimalStemFilter(this.tokenStream);
                break;
            }
            case PORTERSTEM_FILTER: {
                this.tokenStream = new PorterStemFilter(this.tokenStream);
                break;
            }
        }
        return this.tokenStream;
    }

    public static enum TokenizerType {
        CLASSIC,
        STANDARD;

    }
}

