/*
 * Decompiled with CFR 0.152.
 */
package dragon.onlinedb.trec;

import dragon.nlp.Token;
import dragon.onlinedb.Article;
import dragon.onlinedb.ArticleParser;
import dragon.onlinedb.BasicArticle;
import dragon.util.SortedArray;
import java.util.Date;

public class SgmArticleParser
implements ArticleParser {
    protected SortedArray tagList;

    public String assemble(Article article) {
        return null;
    }

    public Article parse(String content) {
        BasicArticle article = null;
        try {
            this.tagList = this.collectTagInformation(content);
            if (this.tagList == null || this.tagList.size() == 0) {
                return null;
            }
            article = new BasicArticle();
            article.setKey(this.extractDocNo(content));
            article.setTitle(this.extractTitle(content));
            article.setAbstract(this.extractAbstract(content));
            article.setMeta(this.extractMeta(content));
            article.setLength(this.extractLength(content));
            article.setDate(this.extractDate(content));
            article.setBody(this.extractBody(content));
            return article;
        }
        catch (Exception e) {
            e.printStackTrace();
            if (article.getKey() != null) {
                return article;
            }
            return null;
        }
    }

    protected int extractLength(String rawText) {
        return 0;
    }

    protected Date extractDate(String rawText) {
        return null;
    }

    protected String extractDocNo(String rawText) {
        Token tag = this.getDocNoTag();
        if (tag == null) {
            return null;
        }
        return this.getTagContent(rawText, tag, false).trim();
    }

    protected Token getDocNoTag() {
        int pos = this.tagList.binarySearch(new Token("DOCNO"));
        if (pos < 0) {
            return null;
        }
        return (Token)this.tagList.get(pos);
    }

    protected String extractTitle(String rawText) {
        int start;
        Token tag = this.getTitleTag();
        if (tag == null) {
            return null;
        }
        StringBuffer out = new StringBuffer();
        this.getTagContent(rawText, tag.getName(), tag.getIndex(), out);
        if (tag.getName().equals("HL") && (start = out.indexOf("----")) >= 0) {
            out.delete(start, out.length());
        }
        if (out.length() >= 5) {
            if (".!;?".indexOf(out.charAt(out.length() - 1)) < 0) {
                out.append('.');
            }
            return out.toString();
        }
        return null;
    }

    protected Token getTitleTag() {
        int pos = this.tagList.binarySearch(new Token("HEAD"));
        if (pos < 0) {
            pos = this.tagList.binarySearch(new Token("HEADLINE"));
        }
        if (pos < 0) {
            pos = this.tagList.binarySearch(new Token("HL"));
        }
        if (pos < 0) {
            pos = this.tagList.binarySearch(new Token("TITLE"));
        }
        if (pos < 0) {
            pos = this.tagList.binarySearch(new Token("TI"));
        }
        if (pos < 0) {
            return null;
        }
        return (Token)this.tagList.get(pos);
    }

    protected String extractAbstract(String rawText) {
        Token tag = this.getAbstractTag();
        if (tag == null) {
            return null;
        }
        StringBuffer out = new StringBuffer();
        this.getTagContent(rawText, tag.getName(), tag.getIndex(), out);
        if (out.length() >= 5) {
            if (".!;?".indexOf(out.charAt(out.length() - 1)) < 0) {
                out.append('.');
            }
            return out.toString();
        }
        return null;
    }

    protected Token getAbstractTag() {
        int pos = this.tagList.binarySearch(new Token("LP"));
        if (pos < 0) {
            pos = this.tagList.binarySearch(new Token("LEADPARA"));
        }
        if (pos < 0) {
            return null;
        }
        return (Token)this.tagList.get(pos);
    }

    protected String extractMeta(String rawText) {
        Token tag = this.getMetaTag();
        if (tag == null) {
            return null;
        }
        StringBuffer out = new StringBuffer();
        this.getTagContent(rawText, tag.getName(), tag.getIndex(), out);
        if (out.length() >= 1) {
            return out.toString();
        }
        return null;
    }

    protected Token getMetaTag() {
        int pos = this.tagList.binarySearch(new Token("DESCRIPT"));
        if (pos < 0) {
            pos = this.tagList.binarySearch(new Token("IN"));
        }
        if (pos < 0) {
            return null;
        }
        return (Token)this.tagList.get(pos);
    }

    protected String extractBody(String rawText) {
        Token tag = this.getBodyTag();
        if (tag == null) {
            return null;
        }
        StringBuffer out = new StringBuffer();
        int start = tag.getIndex();
        int end = this.getTagContent(rawText, tag.getName(), start, out);
        while (end > start) {
            start = end;
            end = this.getTagContent(rawText, tag.getName(), start, out);
        }
        if (out.length() > 40) {
            return out.toString();
        }
        return null;
    }

    protected Token getBodyTag() {
        int pos = this.tagList.binarySearch(new Token("TEXT"));
        if (pos < 0) {
            return null;
        }
        return (Token)this.tagList.get(pos);
    }

    protected int getTagContent(String content, String tag, int start, StringBuffer out) {
        start = content.indexOf("<" + tag + ">", start);
        if (start < 0) {
            return start;
        }
        start = start + 2 + tag.length();
        int end = content.indexOf("</" + tag + ">", start);
        if (end < 0) {
            return start;
        }
        if (out.length() > 0) {
            out.append(' ');
        }
        out.append(this.removeTag(content.substring(start, end)));
        return end + 3 + tag.length();
    }

    protected String getTagContent(String rawText, String tagName, boolean preprocess) {
        int pos = this.tagList.binarySearch(new Token(tagName));
        if (pos < 0) {
            return null;
        }
        Token tag = (Token)this.tagList.get(pos);
        int start = tag.getIndex() + 2 + tag.getName().length();
        int end = rawText.indexOf("</" + tag + ">", start);
        if (end < 0) {
            return null;
        }
        String tagContent = rawText.substring(start, end);
        if (preprocess) {
            tagContent = this.removeTag(tagContent);
        }
        return tagContent;
    }

    protected String getTagContent(String rawText, Token tag, boolean preprocess) {
        if (tag == null) {
            return null;
        }
        int start = tag.getIndex() + 2 + tag.getName().length();
        int end = rawText.indexOf("</" + tag + ">", start);
        if (end < 0) {
            return null;
        }
        String tagContent = rawText.substring(start, end);
        if (preprocess) {
            tagContent = this.removeTag(tagContent);
        }
        return tagContent;
    }

    protected String removeTag(String content) {
        StringBuffer sb = new StringBuffer();
        int start = 0;
        int lastPos = 0;
        while (start >= 0) {
            if ((start = content.indexOf(60, start)) < 0) continue;
            if (start > lastPos) {
                sb.append(this.processTagContent(content.substring(lastPos, start)));
                sb.append(' ');
            }
            if ((start = content.indexOf(">", start)) < 0) continue;
            lastPos = start + 1;
        }
        if (lastPos < content.length()) {
            sb.append(this.processTagContent(content.substring(lastPos).trim()));
        }
        return sb.toString();
    }

    private String processTagContent(String content) {
        if (content.length() <= 10) {
            return "";
        }
        if (content.length() >= 400 && !this.containSentence(content)) {
            content = content.replaceAll("\n", ". ");
        }
        if ((content = this.replacement(content)).length() > 40 && ".!;?".indexOf(content.charAt(content.length() - 1)) < 0) {
            content = content + ".";
        }
        return content;
    }

    private String replacement(String content) {
        content = content.replaceAll("&amp;", "&");
        content = content.replaceAll("''", "\"");
        content = content.replaceAll("``", "\"");
        content = content.replace('\r', ' ');
        content = content.replace('\n', ' ').trim();
        return content;
    }

    private boolean containSentence(String content) {
        if (content == null) {
            return false;
        }
        int start = content.indexOf(". ");
        if (start >= 0 && start <= 400) {
            return true;
        }
        start = content.indexOf(".\r");
        if (start >= 0 && start <= 400) {
            return true;
        }
        start = content.indexOf(".\n");
        return start >= 0 && start <= 400;
    }

    protected SortedArray collectTagInformation(String content) {
        try {
            SortedArray tagList = new SortedArray(30);
            int start = content.indexOf(60);
            while (start >= 0) {
                if (content.charAt(start + 1) != '/') {
                    int end = content.indexOf(62, start);
                    Token curToken = new Token(content.substring(start + 1, end), start, 1);
                    if (!tagList.add(curToken)) {
                        curToken = (Token)tagList.get(tagList.insertedPos());
                        curToken.addFrequency(1);
                    }
                    start = end + 1;
                } else {
                    ++start;
                }
                start = content.indexOf(60, start);
            }
            return tagList;
        }
        catch (Exception e) {
            System.out.println("Invalid SGM format!");
            return null;
        }
    }
}

