
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.ArrayList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.ClassicAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;


public class LuceneAnalyzers {
	
	private static final List<String> analyzers = Arrays.asList(
			"StandardAnalyzer",
			"StandardAnalyzer (no stopwords)",
			"ClassicAnalyzer",
			"StopAnalyzer",
			"WhitespaceAnalyzer");
	
	
	public static List<String> analyze(String text, Analyzer analyzer) throws IOException{
	    List<String> result = new ArrayList<String>();
	    TokenStream tokenStream = analyzer.tokenStream("text", text);
	    CharTermAttribute attr = tokenStream.addAttribute(CharTermAttribute.class);
	    tokenStream.reset();
	    while(tokenStream.incrementToken()) {
	       result.add(attr.toString());
	    }       
	    return result;
	}
	
	public static void main(String[] args) {
		
		String text = "#NLP the new @OpenAI's text-generating model (https://en.wikipedia.org/wiki/GPT-3): are you scared human yet :-} ?";
		List<String> result = null;
		
		System.out.println("Original text: ");
		System.out.println();
		System.out.println(text);
		System.out.println();
		
		for (String analyzer : analyzers) {
			
			
			
			try {
				switch (analyzer) {
	            case "StandardAnalyzer":
	            	result = analyze(text, new StandardAnalyzer());
	                break;
	            case "StandardAnalyzer (no stopwords)":
	            	result = analyze(text, new StandardAnalyzer(ClassicAnalyzer.STOP_WORDS_SET));
	            	break;
	            case "StopAnalyzer":
	            	result = analyze(text, new StopAnalyzer(ClassicAnalyzer.STOP_WORDS_SET));
	            	break;
	            case "WhitespaceAnalyzer":
	            	result = analyze(text, new WhitespaceAnalyzer());
	            	break;
	            case "ClassicAnalyzer":
	            	result = analyze(text, new ClassicAnalyzer());
	            	break;
	            default:
                    throw new IllegalArgumentException("Unknown analyzer: " + analyzer);
			}
				
				
				String result_tokens = String.join(",", result);
				System.out.println("\t " + analyzer + ": " +  result_tokens);
				System.out.println();
				
				
			}
				
				catch (IOException e) {
		            e.printStackTrace();
		            System.out.println("Analyzer faild with text: " + text);
		            System.exit(0);
				}

			
		}
		
		
	}

}
	
