#!.venv/bin/python
"""
Creates `new_words.txt` with words that failed a spellcheck.

If the `--merge` flag is given, the content of that file is merged in the
spelling dictionary instead.

Run `make spelling` to run the actual spellcheck.
"""
import argparse
import sys
from collections import Counter
from pathlib import Path
from typing import List
from typing import Optional


*_, EPILOG = __doc__.strip().split("\n")
DESCRIPTION = "\n".join(_)

BASE_PATH = Path(__file__).parent.absolute()
NEW_WORDS_FILE = BASE_PATH / "new_words.txt"
SPELLCHECK_DIR = BASE_PATH / "build/spelling"
WORDLIST_FILE = BASE_PATH / "source/dict.txt"


def get_misspelled_words(error_dir: Path) -> Counter:
    """Return counter for misspelled words form files in *error_dir*."""
    words = Counter()
    for error_file in (e for e in error_dir.glob("*.spelling")):
        lines = error_file.read_text().split("\n")
        for line in (line for l in lines if (line := l.strip())):
            try:
                word = line.split("(")[1].split(")")[0]
            except IndexError:
                print("failed to parse line: {line}", file=sys.stderr)
                continue
            words[word] += 1
    return words


def merge_wordfiles(*files: List[Path], out_file: Optional[Path] = None) -> List:
    """Combine words form *files*, return sorted results and write to *out_file* if set."""
    words = set()
    for wordfile in (f for f in files if f.exists()):
        with wordfile.open() as f:
            for line in (line for l in f if (line := l.strip())):
                words.add(line)
    words = sorted(words, key=lambda word: (word.lower(), word))
    if out_file is not None:
        out_file.write_text("\n".join(words) + "\n")
    return words


def write_errors_to_newlist(error_dir: Path, outfile: Path) -> Counter:
    """Write errors found in *error_dir* to *outfile*, sorted by hit count."""
    if not (error_dir.exists() and error_dir.is_dir()):
        raise ValueError(f"'{error_dir}' not found")
    error_counter = get_misspelled_words(error_dir)
    outfile.write_text("\n".join((t[0] for t in error_counter.most_common())))
    return error_counter


def merge_newlist_to_wordlist(newlist: Path, wordlist: Path) -> List:
    """Merge *newlist* to *wordlist* and return the results."""
    if not (newlist.exists() and newlist.is_file()):
        raise ValueError(f"'{newlist}' not found")
    return merge_wordfiles(newlist, wordlist, out_file=wordlist)


def get_args(argv=None):
    ap = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG)
    ap.add_argument(
        "--merge", action="store_true", help="merge new words to dictionary"
    )
    return ap.parse_args(argv)


def main(
    merge=False,
    newlist: Path = NEW_WORDS_FILE,
    wordlist: Path = WORDLIST_FILE,
    error_dir: Path = SPELLCHECK_DIR,
):
    """
    Write errors found in *error_dir* to *newlist*, sorted by hit count.

    Or merge *newlist* to *wordlist* instead, if *merge* is set.
    """
    if merge:
        try:
            words = merge_newlist_to_wordlist(newlist, wordlist)
            print(f"Wrote {len(words)} words to dictionary '{wordlist}'.")
        except ValueError:
            print(
                f"[ERROR] no wordlist found in '{newlist}'.\n",
                "You can run without the `--merge` flag, to create it.",
                file=sys.stderr,
            )
            return 4
    else:
        try:
            error_counter = write_errors_to_newlist(error_dir, newlist)
            print(
                f"Found {len(error_counter)} unique words in a total of "
                f"{sum(error_counter.values())} misspelled words."
            )
        except ValueError:
            print(
                f"[ERROR] no spellcheck results found in '{error_dir}'.\n",
                "You can run `make spelling` to create them.",
                file=sys.stderr,
            )
            return 3

    return 0


if __name__ == "__main__":
    args = get_args()
    sys.exit(main(**vars(args)))