package de.huberlin.ire;

import com.google.gson.FieldNamingPolicy;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import us.codecraft.xsoup.XPathEvaluator;
import us.codecraft.xsoup.Xsoup;

import javax.json.*;
import javax.json.stream.JsonGenerator;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;

public class GooglePlayStoreSpider {

    private static final String SEARCH_URL_PATTERN = "https://play.google.com/store/search?q=%s&c=apps";
    private static final String APP_PAGE_URL_PATTERN = "https://play.google.com%s&hl=de";

    private static final String URL_XPATH_EXPRESSION = "//div[contains(@class,'id-card-list')]" +
        "//a[@class='card-click-target'][1]/@href";

    private static final String APP_NAME_XPATH = "//h1[@itemprop='name']/span/text()";
    private static final String REVIEW_COUNT_XPATH = "//span[contains(@aria-label,'Bewertungen')]/text()";
    private static final String DESCRIPTION_XPATH = "//meta[@itemprop='description']/@content";
    private static final String EXTENDED_META_INFO_XPATH = "//div[@class='BgcNfc']";

    public List<String> readAppNames(Path appListJsonFile) {
        List<String> result = new ArrayList<>();

        try (FileReader fileReader = new FileReader(appListJsonFile.toFile());
             JsonReader jsonReader = Json.createReader(fileReader)) {
            JsonArray jsonValues = jsonReader.readArray();

            for (int i = 0; i < jsonValues.size(); i++) {
                JsonObject jsonValue = jsonValues.getJsonObject(i);
                result.add(jsonValue.getString("app_name"));
            }
        } catch (IOException ex) {
            System.out.println("Can't read input file " + appListJsonFile.toString());
            ex.printStackTrace();
        }

        return result;
    }

    public List<App> crawlAppInformation(List<String> appNames) {
        XPathEvaluator urlXPath = Xsoup.compile(URL_XPATH_EXPRESSION);
        XPathEvaluator appTitleXPath = Xsoup.compile(APP_NAME_XPATH);
        XPathEvaluator ratingsCountXPath = Xsoup.compile(REVIEW_COUNT_XPATH);
        XPathEvaluator descriptionXPath = Xsoup.compile(DESCRIPTION_XPATH);
        XPathEvaluator extendedMetaInformationXPath = Xsoup.compile(EXTENDED_META_INFO_XPATH);

        List<App> apps = new ArrayList<>();

        for (String appName : appNames) {
            System.out.println("Processing app " + appName);

            String encodedAppName = "";
            try {
                encodedAppName = URLEncoder.encode(appName, "UTF8");

                String searchUrl = String.format(SEARCH_URL_PATTERN, encodedAppName);
                System.out.println("\tSearch for app via " + searchUrl);

                Document searchResultDocument = Jsoup.connect(searchUrl).get();
                String appUrl = urlXPath.evaluate(searchResultDocument).get();
                System.out.println("\tFound app url " + appUrl);

                String appPageUrl = String.format(APP_PAGE_URL_PATTERN, appUrl);
                System.out.println("\tRetrieving app page " + appPageUrl);
                Document appPageDocument = Jsoup.connect(appPageUrl).get();

                String title = appTitleXPath.evaluate(appPageDocument).get();
                String reviewCount = ratingsCountXPath.evaluate(appPageDocument).get();
                String description = descriptionXPath.evaluate(appPageDocument).get();

                String installations = "";
                String lastUpdate = "";
                String currentVersion = "";

                Elements elements = extendedMetaInformationXPath.evaluate(appPageDocument).getElements();
                for (Element next : elements) {
                    String elementText = next.text();

                    if (elementText.contains("Installationen")) {
                        installations = next.parent().child(1).text();
                    } else if (elementText.contains("Aktualisiert")) {
                        lastUpdate = next.parent().child(1).text();
                    } else if (elementText.contains("Aktuelle Version")) {
                        currentVersion = next.parent().child(1).text();
                    }
                }

                App app = new App(title, appUrl, reviewCount, description, installations, lastUpdate, currentVersion);
                System.out.println("\tFound the following information: " + app.toString());

                apps.add(app);

            } catch (UnsupportedEncodingException ex) {
                System.out.println("Can't encode app name " + appName);
                ex.printStackTrace();
            } catch (IOException ex) {
                System.out.println("Exception while processing app " + appName);
                ex.printStackTrace();
            }
        }

        return apps;
    }

    public void saveApps(List<App> apps, Path outputFolder) {
        for (int i = 0; i < apps.size(); i++) {
            Path outputJsonFile = outputFolder.resolve(i + ".json");

            // Normal json file, however serialized with google gson!
            Path outputGsonFile = outputFolder.resolve(i + ".gson");

            App app = apps.get(i);

            try (BufferedWriter jsonFileWriter = Files.newBufferedWriter(outputJsonFile);
                 BufferedWriter gsonFileWriter = Files.newBufferedWriter(outputGsonFile);
                 JsonGenerator writer = Json.createGenerator(jsonFileWriter)) {

                // Json serialization with using the default java api!
                JsonObjectBuilder objectBuilder = Json.createObjectBuilder();
                objectBuilder.add("app_name", app.getTitle());
                objectBuilder.add("url", app.getUrl());
                objectBuilder.add("description", app.getDescription());
                objectBuilder.add("review_count", app.getReviewCount());
                objectBuilder.add("last_update", app.getLastUpdate());
                objectBuilder.add("installations", app.getInstallations());
                objectBuilder.add("current_version", app.getCurrentVersion());

                JsonObject jsonObject = objectBuilder.build();

                writer.writeStartArray();
                writer.write(jsonObject);
                writer.writeEnd();

                // Json serialization using the gson library
                GsonBuilder gsonBuilder = new GsonBuilder();
                gsonBuilder.setFieldNamingStrategy(FieldNamingPolicy.LOWER_CASE_WITH_DASHES);

                Gson gson = gsonBuilder.create();
                gson.toJson(new App[]{app}, gsonFileWriter);

            } catch (IOException ex) {
                System.out.println("Error while writing information for app " + app);
            }
        }
    }

    public static void main(String[] arguments) {
        if (arguments.length != 2) {
            System.out.println("Please call the program with: java -jar GooglePlayStoreSpider <app.json> <output-dir>");
            System.exit(-1);
        }

        Path appListFile = Paths.get(arguments[0]);
        Path outputFolder = Paths.get(arguments[1]);

        try {
            Files.createDirectories(outputFolder);
        } catch (IOException ex) {
            System.out.println("Can't create output folder " + outputFolder);
            System.exit(-1);
        }

        GooglePlayStoreSpider spider = new GooglePlayStoreSpider();

        List<String> appNames = spider.readAppNames(appListFile);
        List<App> crawledApps = spider.crawlAppInformation(appNames);
        spider.saveApps(crawledApps, outputFolder);
    }
}
