/**
 * Authors: Frederik Leyvraz, David Degenhardt
 * License: GNU General Public License v3.0 only
 * Version: 1.0.0
 */

package ch.bfh.ti.latexindexer;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class PandocParser implements Parser {
    private String latexFilePath;
    private final String PANDOC = "pandoc";
    private final String PANDOC_OPTIONS = "--to=plain";
    // private final String PANDOC_LUA_FILTER = "src/main/resources/frequency-filter.lua";
    private final String LEADING_TRAILING_SYMBOLS = "^[^a-zA-Z]+|[^a-zA-Z]+$";

    /**
     * Constructor
     * @param latexFile The path to the latex file that should be parsed.
     */
    public PandocParser(String latexFile) {
        this.latexFilePath = latexFile;
    }

    /**
     * Checks the version of the parser
     * @throws IOException If the parser is not present on the system.
     */
    public void checkVersion() throws IOException {
        String output;
        Process pandoc = new ProcessBuilder(PANDOC, "-v").start();
        BufferedReader input = new BufferedReader(new InputStreamReader(pandoc.getInputStream()));
        while ((output = input.readLine()) != null) {
            System.err.println(output);
        }

        input.close();
    }

    @Override
    public List<Word> parseDocument() throws IOException {
        Map<String, Integer> words = new HashMap<>();
        checkVersion();

        File latexFile = new File(latexFilePath);
        ProcessBuilder processBuilder = new ProcessBuilder(PANDOC, PANDOC_OPTIONS, latexFile.getAbsolutePath());
        processBuilder.directory(latexFile.getParentFile());

        try {
            Process process = processBuilder.start();
            BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));

            String line;
            while ((line = reader.readLine()) != null) {
                String[] wordsOfLine = line.split("\\W+");
                for (String word : wordsOfLine) {
                    word = word.replaceAll(LEADING_TRAILING_SYMBOLS, "");
                    if (!word.isEmpty()) {
                        words.computeIfPresent(word, (k, v) -> v + 1);
                        words.putIfAbsent(word, 1);
                    }
                }
            }

            BufferedReader error = new BufferedReader(new InputStreamReader(process.getErrorStream()));
            while ((line = error.readLine()) != null) {
                System.err.println(line);
            }
            reader.close();

            int exitCode = process.waitFor();
            if (exitCode != 0) {
                System.err.println("[ERROR] Pandoc exited with code: " + exitCode);
            }
        } catch (InterruptedException e) {
            throw new IOException("Error running Pandoc", e);
        }
        List<Word> result = new ArrayList<>();
        for (String word : words.keySet()) {
            result.add(new Word(word, words.get(word)));
        }
        return result;
    }
}
