Source code
Revision control
Copy as Markdown
Other Tools
#!/usr/bin/env python3
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
"""Re-assemble en_US-mozilla.dic after make-hunspell-dict, merging in the
suggestion exclusions (lines ending in '!') carried over from the previous
Mozilla dictionary.
make-hunspell-dict produces a UTF-8 .dic with a count line at the top
followed by sorted entries. This script:
1. Reads the entries (stripping the count line) from the new dictionary.
2. Reads the suggestion-exclusion entries (still in their munched form)
from the file produced by make-new-dict.sh, decoded as ISO-8859-1
because the upstream pipeline keeps that file in ISO-8859-1.
3. Concatenates and re-sorts both lists.
4. Writes the result back as UTF-8 with the updated count line.
"""
import argparse
import pathlib
import sys
def main(argv=None):
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"dic_file",
type=pathlib.Path,
help="dictionary file (modified in place)",
)
parser.add_argument(
"nosug_file",
type=pathlib.Path,
help="munched suggestion-exclusion list (ISO-8859-1)",
)
args = parser.parse_args(argv)
new_entries = args.dic_file.read_text(encoding="utf-8").splitlines()[1:]
nosug_entries = args.nosug_file.read_text(encoding="iso-8859-1").splitlines()
combined = sorted(new_entries + nosug_entries)
body = "\n".join(combined)
args.dic_file.write_text(f"{len(combined)}\n{body}\n", encoding="utf-8")
return 0
if __name__ == "__main__":
sys.exit(main())