/*
 * Decompiled with CFR 0.152.
 */
package jcolibri.extensions.textual.IE.opennlp;

import java.util.Collection;
import java.util.List;
import jcolibri.cbrcore.Attribute;
import jcolibri.cbrcore.CBRCase;
import jcolibri.cbrcore.CBRQuery;
import jcolibri.extensions.textual.IE.IEutils;
import jcolibri.extensions.textual.IE.gate.GateSplitter;
import jcolibri.extensions.textual.IE.opennlp.IETextOpenNLP;
import jcolibri.extensions.textual.IE.representation.IEText;
import jcolibri.extensions.textual.IE.representation.Paragraph;
import jcolibri.extensions.textual.IE.representation.Sentence;
import jcolibri.extensions.textual.IE.representation.Token;
import jcolibri.util.AttributeUtils;
import jcolibri.util.ProgressController;
import opennlp.common.xml.NLPDocument;
import opennlp.grok.preprocess.sentdetect.EnglishSentenceDetectorME;
import opennlp.grok.preprocess.tokenize.EnglishTokenizerME;
import opennlp.grok.preprocess.tokenize.TokenizerME;
import org.apache.commons.logging.LogFactory;
import org.jdom.Element;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class OpennlpSplitter {
    private static TokenizerME tokeniser = null;
    private static EnglishSentenceDetectorME englishSentenceDetector = null;

    public static void split(Collection<CBRCase> cases, Collection<Attribute> attributes) {
        LogFactory.getLog(OpennlpSplitter.class).info((Object)"Splitting OpenNLP text.");
        ProgressController.init(OpennlpSplitter.class, "Splitting OpenNLP text", cases.size());
        for (CBRCase c : cases) {
            for (Attribute a : attributes) {
                Object o = AttributeUtils.findValue(a, c);
                if (!(o instanceof IETextOpenNLP)) continue;
                OpennlpSplitter.split((IETextOpenNLP)o);
            }
            ProgressController.step(OpennlpSplitter.class);
        }
        ProgressController.finish(OpennlpSplitter.class);
    }

    public static void split(CBRQuery query, Collection<Attribute> attributes) {
        LogFactory.getLog(OpennlpSplitter.class).info((Object)"Splitting OpenNLP text.");
        for (Attribute a : attributes) {
            Object o = AttributeUtils.findValue(a, query);
            if (!(o instanceof IETextOpenNLP)) continue;
            OpennlpSplitter.split((IETextOpenNLP)o);
        }
    }

    public static void split(Collection<CBRCase> cases) {
        LogFactory.getLog(OpennlpSplitter.class).info((Object)"Splitting OpenNLP text.");
        ProgressController.init(OpennlpSplitter.class, "Splitting OpenNLP text", cases.size());
        for (CBRCase c : cases) {
            Collection<IEText> texts = IEutils.getTexts(c);
            for (IEText t : texts) {
                if (!(t instanceof IETextOpenNLP)) continue;
                OpennlpSplitter.split((IETextOpenNLP)t);
            }
            ProgressController.step(OpennlpSplitter.class);
        }
        ProgressController.finish(OpennlpSplitter.class);
    }

    public static void split(CBRQuery query) {
        LogFactory.getLog(OpennlpSplitter.class).info((Object)"Splitting OpenNLP text.");
        Collection<IEText> texts = IEutils.getTexts(query);
        for (IEText t : texts) {
            if (!(t instanceof IETextOpenNLP)) continue;
            OpennlpSplitter.split((IETextOpenNLP)t);
        }
    }

    public static void split(IETextOpenNLP text) {
        try {
            TokenizerME tokeniser = OpennlpSplitter.getTokeniser();
            tokeniser.process(text.getDocument());
            EnglishSentenceDetectorME sd = OpennlpSplitter.getSentenceDetector();
            sd.process(text.getDocument());
            OpennlpSplitter.organizeText(text);
        }
        catch (Exception e) {
            LogFactory.getLog(GateSplitter.class).error((Object)e);
        }
    }

    protected static void organizeText(IETextOpenNLP text) {
        NLPDocument doc = text.getDocument();
        Element root = doc.getRootElement();
        Element texte = root.getChild("text");
        List pars = texte.getChildren();
        String[] parsText = doc.getParagraphs();
        int p = 0;
        while (p < parsText.length) {
            Element par = (Element)pars.get(p);
            String parText = parsText[p];
            Paragraph myPar = new Paragraph(parText);
            text.setParagraphMapping(myPar, par);
            text.addParagraph(myPar);
            List sents = par.getChildren();
            String[] sentsText = doc.getSentences(par);
            int s = 0;
            while (s < sentsText.length) {
                Element sent = (Element)sents.get(s);
                String sentText = sentsText[s];
                Sentence mySent = new Sentence(sentText);
                myPar.addSentence(mySent);
                text.setSentenceMapping(mySent, sent);
                List toks = sent.getChildren();
                String[] toksText = doc.getWords(sent);
                int t = 0;
                while (t < toksText.length) {
                    Element tok = (Element)toks.get(t);
                    String tokText = toksText[t];
                    Token myTok = new Token(tokText);
                    mySent.addToken(myTok);
                    text.setTokenMapping(myTok, tok);
                    ++t;
                }
                ++s;
            }
            ++p;
        }
    }

    private static TokenizerME getTokeniser() throws Exception {
        if (tokeniser == null) {
            tokeniser = new EnglishTokenizerME();
        }
        return tokeniser;
    }

    private static EnglishSentenceDetectorME getSentenceDetector() {
        if (englishSentenceDetector == null) {
            englishSentenceDetector = new EnglishSentenceDetectorME();
        }
        return englishSentenceDetector;
    }
}

