/*
 * Decompiled with CFR 0.152.
 */
package jcolibri.extensions.textual.IE.common;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jcolibri.cbrcore.Attribute;
import jcolibri.cbrcore.CBRCase;
import jcolibri.cbrcore.CBRQuery;
import jcolibri.extensions.textual.IE.IEutils;
import jcolibri.extensions.textual.IE.common.PhrasesExtractor;
import jcolibri.extensions.textual.IE.gate.GatePhrasesExtractor;
import jcolibri.extensions.textual.IE.representation.IEText;
import jcolibri.extensions.textual.IE.representation.info.FeatureInfo;
import jcolibri.util.AttributeUtils;
import jcolibri.util.FileIO;
import jcolibri.util.ProgressController;
import org.apache.commons.logging.LogFactory;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class FeaturesExtractor {
    static ArrayList<FeatureRule> featuresRules;

    public static void extractFeatures(Collection<CBRCase> cases, Collection<Attribute> attributes) {
        LogFactory.getLog(FeaturesExtractor.class).info((Object)"Extracting features.");
        ProgressController.init(PhrasesExtractor.class, "Extracting features ...", cases.size());
        for (CBRCase c : cases) {
            for (Attribute a : attributes) {
                Object o = AttributeUtils.findValue(a, c);
                FeaturesExtractor.extractFeatures((IEText)o);
            }
            ProgressController.step(GatePhrasesExtractor.class);
        }
        ProgressController.finish(GatePhrasesExtractor.class);
    }

    public static void extractFeatures(CBRQuery query, Collection<Attribute> attributes) {
        LogFactory.getLog(FeaturesExtractor.class).info((Object)"Extracting features.");
        for (Attribute a : attributes) {
            Object o = AttributeUtils.findValue(a, query);
            FeaturesExtractor.extractFeatures((IEText)o);
        }
    }

    public static void extractFeatures(Collection<CBRCase> cases) {
        LogFactory.getLog(FeaturesExtractor.class).info((Object)"Extracting features.");
        ProgressController.init(PhrasesExtractor.class, "Extracting features ...", cases.size());
        for (CBRCase c : cases) {
            Collection<IEText> texts = IEutils.getTexts(c);
            for (IEText t : texts) {
                FeaturesExtractor.extractFeatures(t);
            }
            ProgressController.step(GatePhrasesExtractor.class);
        }
        ProgressController.finish(GatePhrasesExtractor.class);
    }

    public static void extractFeatures(CBRQuery query) {
        LogFactory.getLog(FeaturesExtractor.class).info((Object)"Extracting features.");
        Collection<IEText> texts = IEutils.getTexts(query);
        for (IEText t : texts) {
            FeaturesExtractor.extractFeatures(t);
        }
    }

    public static void extractFeatures(IEText text) {
        String rawText = text.getRAWContent();
        for (FeatureRule rule : featuresRules) {
            Matcher m = rule._pattern.matcher(rawText);
            while (m.find()) {
                String group = m.group(rule._group);
                group = FeaturesExtractor.cleanSpaces(group);
                text.addFeature(new FeatureInfo(rule._feature, group, m.start(), m.end()));
            }
        }
    }

    private static String cleanSpaces(String w) {
        String res = "";
        StringTokenizer st = new StringTokenizer(w, " ");
        while (st.hasMoreTokens()) {
            res = String.valueOf(res) + st.nextToken();
            if (!st.hasMoreTokens()) continue;
            res = String.valueOf(res) + " ";
        }
        return res;
    }

    public static void loadRules(String filename) {
        try {
            featuresRules = new ArrayList();
            URL file = FileIO.findFile(filename);
            BufferedReader br = new BufferedReader(new InputStreamReader(file.openStream()));
            String line = "";
            while ((line = br.readLine()) != null) {
                if (line.startsWith("#")) continue;
                int pos = line.indexOf(93);
                if (pos == -1) {
                    throw new Exception(String.valueOf(line) + "  Feature field not found");
                }
                String _feature = line.substring(1, pos);
                String _rest = line.substring(pos + 1);
                if ((pos = _rest.indexOf(125)) == -1) {
                    throw new Exception(String.valueOf(line) + "  FeaturePostion field not found");
                }
                String _group = _rest.substring(1, pos);
                String _rule = _rest.substring(pos + 1);
                int g = Integer.parseInt(_group);
                featuresRules.add(new FeatureRule(_feature, Pattern.compile(_rule), g));
            }
            br.close();
        }
        catch (Exception e) {
            LogFactory.getLog(FeaturesExtractor.class).error((Object)e);
        }
    }

    private static class FeatureRule {
        String _feature;
        Pattern _pattern;
        int _group;

        FeatureRule(String _f, Pattern _p, int _g) {
            this._feature = _f;
            this._pattern = _p;
            this._group = _g;
        }
    }
}

