/*
 * Decompiled with CFR 0.152.
 */
package dragon.ml.seqmodel.data;

import dragon.ml.seqmodel.data.BasicDataSequence;
import dragon.ml.seqmodel.data.BasicDataset;
import dragon.ml.seqmodel.data.BasicToken;
import dragon.ml.seqmodel.data.DataReader;
import dragon.ml.seqmodel.data.DataSequence;
import dragon.ml.seqmodel.data.Dataset;
import dragon.ml.seqmodel.data.LabelConverter;
import dragon.util.FileUtil;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.StringTokenizer;

public class FlatSegmentReader
implements DataReader {
    private int originalLabelNum;
    private int markovOrder;
    private String delimit;
    private String tagDelimit;
    private String impDelimit;
    private BufferedReader tin;
    private LabelConverter labelConverter;
    private int[] labels;
    boolean fixedColFormat;
    boolean tagged;

    public FlatSegmentReader(int originalLabelNum, int markovOrder, String taggedFile, LabelConverter labelConverter) {
        this.originalLabelNum = originalLabelNum;
        this.markovOrder = markovOrder;
        this.tin = FileUtil.getTextReader(taggedFile);
        this.labelConverter = labelConverter;
        this.delimit = ",\t/ -():.;'?\\#`&\"_";
        this.tagDelimit = "|";
        this.impDelimit = ",";
        this.labels = this.readHeaderInfo(this.tin);
        this.fixedColFormat = this.labels != null;
        this.tagged = true;
    }

    public FlatSegmentReader(int originalLabelNum, int markovOrder, String rawFile) {
        this.originalLabelNum = originalLabelNum;
        this.markovOrder = markovOrder;
        this.tin = FileUtil.getTextReader(rawFile);
        this.labelConverter = null;
        this.delimit = " \t";
        this.tagDelimit = "|";
        this.impDelimit = "";
        this.tagged = false;
        this.fixedColFormat = false;
    }

    public Dataset read() {
        DataSequence dataSeq;
        BasicDataset dataset = new BasicDataset(this.originalLabelNum, this.markovOrder);
        while ((dataSeq = this.readRow()) != null && dataSeq.length() != 0) {
            dataset.add(dataSeq);
        }
        return dataset;
    }

    public DataSequence readRow() {
        try {
            if (this.tagged) {
                if (this.fixedColFormat) {
                    return this.readRowFixedCol(this.tin, this.labels);
                }
                return this.readRowVarCol(this.tin);
            }
            return this.readRaw();
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    public void close() {
        try {
            this.tin.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private String[] getTokenList(String text2) {
        String tokStr;
        StringTokenizer textTok = new StringTokenizer(text2.toLowerCase(), this.delimit, true);
        int tlen = 0;
        while (textTok.hasMoreTokens()) {
            tokStr = textTok.nextToken();
            if (this.delimit.indexOf(tokStr) != -1 && this.impDelimit.indexOf(tokStr) == -1) continue;
            ++tlen;
        }
        String[] cArray = new String[tlen];
        tlen = 0;
        textTok = new StringTokenizer(text2.toLowerCase(), this.delimit, true);
        while (textTok.hasMoreTokens()) {
            tokStr = textTok.nextToken();
            if (this.delimit.indexOf(tokStr) != -1 && this.impDelimit.indexOf(tokStr) == -1) continue;
            cArray[tlen++] = tokStr;
        }
        return cArray;
    }

    private DataSequence readRowVarCol(BufferedReader tin) throws IOException {
        BasicDataSequence dataSeq = new BasicDataSequence();
        block0: while (true) {
            String line = tin.readLine();
            StringTokenizer firstSplit = null;
            if (line != null) {
                firstSplit = new StringTokenizer(line.toLowerCase(), this.tagDelimit);
            }
            if (line == null || firstSplit.countTokens() < 2) {
                return dataSeq;
            }
            String w = firstSplit.nextToken();
            int label = this.labelConverter != null ? this.labelConverter.getInternalLabel(firstSplit.nextToken()) : Integer.parseInt(firstSplit.nextToken());
            String[] arrToken = this.getTokenList(w);
            int i = 0;
            while (true) {
                if (i >= arrToken.length) continue block0;
                BasicToken token2 = new BasicToken(arrToken[i], label);
                if (i == 0) {
                    token2.setSegmentMarker(true);
                } else {
                    token2.setSegmentMarker(false);
                }
                dataSeq.add(token2);
                ++i;
            }
            break;
        }
    }

    private DataSequence readRowFixedCol(BufferedReader tin, int[] labels) throws IOException {
        String line = tin.readLine();
        if (line == null) {
            return null;
        }
        BasicDataSequence dataSeq = new BasicDataSequence();
        StringTokenizer firstSplit = new StringTokenizer(line.toLowerCase(), this.tagDelimit, true);
        int i = 0;
        while (i < labels.length && firstSplit.hasMoreTokens()) {
            int label = this.labelConverter != null ? this.labelConverter.getInternalLabel(labels[i]) : labels[i];
            String w = firstSplit.nextToken();
            if (this.tagDelimit.indexOf(w) == -1) {
                if (firstSplit.hasMoreTokens()) {
                    firstSplit.nextToken();
                }
                if (label >= 0 && label < this.originalLabelNum) {
                    String[] arrToken = this.getTokenList(w);
                    i = 0;
                    while (i < arrToken.length) {
                        BasicToken token2 = new BasicToken(arrToken[i], label);
                        if (i == 0) {
                            token2.setSegmentMarker(true);
                        } else {
                            token2.setSegmentMarker(false);
                        }
                        dataSeq.add(token2);
                        ++i;
                    }
                }
            }
            ++i;
        }
        return dataSeq;
    }

    private int[] readHeaderInfo(BufferedReader tin) {
        try {
            tin.mark(1000);
            String line = tin.readLine();
            if (line == null) {
                return null;
            }
            if (!line.toLowerCase().startsWith("fixed-column-format")) {
                tin.reset();
                return null;
            }
            line = tin.readLine();
            StringTokenizer firstSplit = new StringTokenizer(line, this.tagDelimit);
            int[] labels = new int[this.originalLabelNum];
            int i = 0;
            while (i < this.originalLabelNum && firstSplit.hasMoreTokens()) {
                labels[i++] = Integer.parseInt(firstSplit.nextToken());
            }
            return labels;
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    private DataSequence readRaw() throws IOException {
        String line = this.tin.readLine();
        BasicDataSequence dataSeq = new BasicDataSequence();
        StringTokenizer tok = new StringTokenizer(line.toLowerCase(), this.delimit, true);
        while (tok.hasMoreTokens()) {
            String tokStr = tok.nextToken();
            if (this.delimit.indexOf(tokStr) != -1 && this.impDelimit.indexOf(tokStr) == -1) continue;
            dataSeq.add(new BasicToken(tokStr));
        }
        return dataSeq;
    }
}

