/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.share.upenn.ner;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
import gnu.trove.THashSet;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Serializable;
import java.util.Set;
import java.util.StringTokenizer;

public class ListMember
extends Pipe
implements Serializable {
    String name;
    Set lexicon;
    boolean ignoreCase;
    int min;
    int max;

    public ListMember(String featureName, File lexFile, boolean ignoreCase) {
        this.name = featureName;
        this.ignoreCase = ignoreCase;
        if (!lexFile.exists()) {
            throw new IllegalArgumentException("File " + lexFile + " not found.");
        }
        try {
            this.lexicon = new THashSet();
            this.min = 99999;
            this.max = -1;
            BufferedReader br = new BufferedReader(new FileReader(lexFile));
            while (br.ready()) {
                String s = br.readLine().trim();
                if (s.equals("")) continue;
                int count2 = this.countTokens(s);
                if (count2 < this.min) {
                    this.min = count2;
                }
                if (count2 > this.max) {
                    this.max = count2;
                }
                if (ignoreCase) {
                    this.lexicon.add(s.toLowerCase());
                    continue;
                }
                this.lexicon.add(s);
            }
        }
        catch (IOException e) {
            System.err.println("Problem with " + lexFile + ": " + e);
            System.exit(0);
        }
    }

    @Override
    public Instance pipe(Instance carrier) {
        int i;
        TokenSequence seq2 = (TokenSequence)carrier.getData();
        boolean[] marked = new boolean[seq2.size()];
        for (i = 0; i < seq2.size(); ++i) {
            StringBuffer sb = new StringBuffer();
            StringBuffer sbs = new StringBuffer();
            for (int j = i; j < i + this.max && j < seq2.size(); ++j) {
                String tests;
                String text2 = ((Token)seq2.get(j)).getText();
                sb.append(text2);
                if (sbs.length() == 0) {
                    sbs.append(text2);
                } else {
                    sbs.append(" " + text2);
                }
                String test2 = this.ignoreCase ? sb.toString().toLowerCase() : sb.toString();
                String string2 = tests = this.ignoreCase ? sbs.toString().toLowerCase() : sbs.toString();
                if (j - i + 1 < this.min || !this.lexicon.contains(test2) && !this.lexicon.contains(tests)) continue;
                this.markFrom(i, j, marked);
            }
        }
        for (i = 0; i < seq2.size(); ++i) {
            if (!marked[i]) continue;
            ((Token)seq2.get(i)).setFeatureValue(this.name, 1.0);
        }
        return carrier;
    }

    private void markFrom(int a, int b, boolean[] marked) {
        for (int i = a; i <= b; ++i) {
            marked[i] = true;
        }
    }

    private int countTokens(String s) {
        StringTokenizer wordst = new StringTokenizer(s, "~`!@#$%^&*()_-+={[}]|\\:;\"',<.>?/ \t\n\r", true);
        return wordst.countTokens();
    }
}

