/**
 * Authors: Frederik Leyvraz, David Degenhardt
 * License: GNU General Public License v3.0 only
 * Version: 1.0.0
 */

package ch.bfh.ti.latexindexer;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;

public class DetexParser implements Parser {
    private String latexFilePath;
    private final String DETEX = "detex";
    private final String DETEX_OPTIONS = "-w";
    // private final String PANDOC_LUA_FILTER = "src/main/resources/frequency-filter.lua";
    private final String LEADING_TRAILING_SYMBOLS = "^[^a-zA-Z]+|[^a-zA-Z]+$";

    /**
     * Constructor
     * @param latexFile The path to the latex file that should be parsed.
     */
    public DetexParser(String latexFile) {
        this.latexFilePath = latexFile;
    }

    /**
     * Checks the version of the parser
     * @throws IOException If the parser is not present on the system.
     */
    public void checkVersion() throws IOException {
        String output;
        Process detex = new ProcessBuilder(DETEX, "-v").start();
        BufferedReader input = new BufferedReader(new InputStreamReader(detex.getInputStream()));
        while ((output = input.readLine()) != null) {
            System.err.println(output);
        }

        input.close();
    }

    @Override
    public List<Word> parseDocument() throws IOException {
        List<Word> words = new ArrayList<>();

        checkVersion();
        File latexFile = new File(latexFilePath);
        ProcessBuilder processBuilder = new ProcessBuilder(DETEX, DETEX_OPTIONS, latexFile.getAbsolutePath());

        processBuilder.directory(latexFile.getParentFile());
        processBuilder.redirectErrorStream(true);
        Process process = processBuilder.start();
        BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()));

        String line;
        while ((line = reader.readLine()) != null) {
            line = line.trim();
            line = line.replaceAll(LEADING_TRAILING_SYMBOLS, "");

            Word word = new Word(line);
            if (!words.contains(word)) {
                words.add(word);
            }
        }
        words.sort(new Word.AlphabeticalComparator());
        reader.close();
        return words;
    }
}
