/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.extract.test;

import cc.mallet.extract.BIOTokenizationFilter;
import cc.mallet.extract.DefaultTokenizationFilter;
import cc.mallet.extract.DocumentExtraction;
import cc.mallet.extract.HierarchicalTokenizationFilter;
import cc.mallet.extract.LabeledSpan;
import cc.mallet.extract.LabeledSpans;
import cc.mallet.extract.Span;
import cc.mallet.extract.StringTokenization;
import cc.mallet.extract.Tokenization;
import cc.mallet.types.Label;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.LabelSequence;
import cc.mallet.util.CharSequenceLexer;
import java.util.regex.Pattern;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import junit.textui.TestRunner;

public class TestDocumentExtraction
extends TestCase {
    public TestDocumentExtraction(String name) {
        super(name);
    }

    public static Test suite() {
        return new TestSuite(TestDocumentExtraction.class);
    }

    public void testToXml() {
        LabelAlphabet dict = new LabelAlphabet();
        String document2 = "the quick brown fox leapt over the lazy dog";
        StringTokenization toks = new StringTokenization(document2, new CharSequenceLexer());
        Label O = dict.lookupLabel("O");
        Label ANML = dict.lookupLabel("ANIMAL");
        Label VB2 = dict.lookupLabel("VERB");
        LabelSequence tags = new LabelSequence(new Label[]{O, ANML, ANML, ANML, VB2, O, O, ANML, ANML});
        DocumentExtraction extr = new DocumentExtraction("Test", dict, toks, tags, "O");
        String actualXml = extr.toXmlString();
        String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<doc>the <ANIMAL>quick brown fox </ANIMAL><VERB>leapt </VERB>over the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
        TestDocumentExtraction.assertEquals((String)expectedXml, (String)actualXml);
    }

    public void testToXmlBIO() {
        LabelAlphabet dict = new LabelAlphabet();
        String document2 = "the quick brown fox leapt over the lazy dog";
        StringTokenization toks = new StringTokenization(document2, new CharSequenceLexer());
        Label O = dict.lookupLabel("O");
        Label BANML = dict.lookupLabel("B-ANIMAL");
        Label ANML = dict.lookupLabel("ANIMAL");
        Label BVB = dict.lookupLabel("B-VERB");
        Label VB2 = dict.lookupLabel("I-VERB");
        LabelSequence tags = new LabelSequence(new Label[]{O, BANML, ANML, BANML, BVB, VB2, O, ANML, ANML});
        DocumentExtraction extr = new DocumentExtraction("Test", dict, toks, tags, null, "O", new BIOTokenizationFilter());
        String actualXml = extr.toXmlString();
        String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<doc>the <ANIMAL>quick brown </ANIMAL><ANIMAL>fox </ANIMAL><VERB>leapt over </VERB>the <ANIMAL>lazy dog</ANIMAL></doc>\r\n";
        TestDocumentExtraction.assertEquals((String)expectedXml, (String)actualXml);
    }

    public void testNestedToXML() {
        LabelAlphabet dict = new LabelAlphabet();
        String document2 = "the quick brown fox leapt over the lazy dog";
        StringTokenization toks = new StringTokenization(document2, new CharSequenceLexer());
        Label O = dict.lookupLabel("O");
        Label ANML = dict.lookupLabel("ANIMAL");
        Label VB2 = dict.lookupLabel("VERB");
        Label JJ2 = dict.lookupLabel("ADJ");
        Label MAMMAL = dict.lookupLabel("MAMMAL");
        LabelSequence tags = new LabelSequence(new Label[]{O, ANML, ANML, ANML, VB2, O, ANML, ANML, ANML});
        LabeledSpans spans = new DefaultTokenizationFilter().constructLabeledSpans(dict, document2, O, toks, tags);
        Span foxToken = toks.subspan(3, 4);
        spans.add(new LabeledSpan(foxToken, MAMMAL, false));
        Span bigDogToken = toks.subspan(7, 8);
        spans.add(new LabeledSpan(bigDogToken, JJ2, false));
        DocumentExtraction extr = new DocumentExtraction("Test", dict, (Tokenization)toks, spans, null, "O");
        String actualXml = extr.toXmlString();
        String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy </ADJ>dog</ANIMAL></doc>\r\n";
        TestDocumentExtraction.assertEquals((String)expectedXml, (String)actualXml);
    }

    public void testNestedXMLTokenizationFilter() {
        LabelAlphabet dict = new LabelAlphabet();
        String document2 = "the quick brown fox leapt over the lazy dog";
        StringTokenization toks = new StringTokenization(document2, new CharSequenceLexer());
        Label O = dict.lookupLabel("O");
        Label ANML = dict.lookupLabel("ANIMAL");
        Label ANML_MAMM = dict.lookupLabel("ANIMAL|MAMMAL");
        Label VB2 = dict.lookupLabel("VERB");
        Label ANML_JJ = dict.lookupLabel("ANIMAL|ADJ");
        Label ANML_JJ_MAMM = dict.lookupLabel("ANIMAL|ADJ|MAMMAL");
        LabelSequence tags = new LabelSequence(new Label[]{O, ANML, ANML, ANML_MAMM, VB2, O, ANML, ANML_JJ, ANML_JJ_MAMM});
        DocumentExtraction extr = new DocumentExtraction("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter());
        String actualXml = extr.toXmlString();
        String expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the <ADJ>lazy <MAMMAL>dog</MAMMAL></ADJ></ANIMAL></doc>\r\n";
        TestDocumentExtraction.assertEquals((String)expectedXml, (String)actualXml);
        extr = new DocumentExtraction("Test", dict, toks, tags, null, "O", new HierarchicalTokenizationFilter(Pattern.compile("AD.*")));
        actualXml = extr.toXmlString();
        expectedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<doc>the <ANIMAL>quick brown <MAMMAL>fox </MAMMAL></ANIMAL><VERB>leapt </VERB>over <ANIMAL>the lazy <MAMMAL>dog</MAMMAL></ANIMAL></doc>\r\n";
        TestDocumentExtraction.assertEquals((String)expectedXml, (String)actualXml);
    }

    public static void main(String[] args) throws Throwable {
        TestSuite theSuite;
        if (args.length > 0) {
            theSuite = new TestSuite();
            for (int i = 0; i < args.length; ++i) {
                theSuite.addTest((Test)new TestDocumentExtraction(args[i]));
            }
        } else {
            theSuite = (TestSuite)TestDocumentExtraction.suite();
        }
        TestRunner.run((Test)theSuite);
    }
}

