.gitignore
.pylintrc
LICENSE
README.md
pyproject.toml
requirements.txt
setup.cfg
setup.py
.github/workflows/ci.yml
bin/opusfilter
bin/opusfilter-cmd
bin/opusfilter-diagram
bin/opusfilter-duplicates
bin/opusfilter-scores
bin/opusfilter-test
docs/CHANGELOG.md
docs/CONTRIBUTING.md
docs/references.bib
example_configs/README.md
example_configs/paracrawl_fi-en/create_ce_sets.yaml
example_configs/paracrawl_fi-en/create_domain_sets.yaml
example_configs/paracrawl_fi-en/create_roc_auc_sets.yaml
example_configs/paracrawl_fi-en/devset_100_1_labels.jsonl
example_configs/paracrawl_fi-en/devset_100_2_labels.jsonl
example_configs/paracrawl_fi-en/prepare_data.yaml
opusfilter/__init__.py
opusfilter/classifier.py
opusfilter/filters.py
opusfilter/lm.py
opusfilter/opusfilter.py
opusfilter/pipeline.py
opusfilter/preprocessors.py
opusfilter/segment_hash.py
opusfilter/tokenization.py
opusfilter/util.py
opusfilter/word_alignment.py
opusfilter.egg-info/PKG-INFO
opusfilter.egg-info/SOURCES.txt
opusfilter.egg-info/dependency_links.txt
opusfilter.egg-info/requires.txt
opusfilter.egg-info/top_level.txt
tests/test_classifier.py
tests/test_filter_pipeline.py
tests/test_filters.py
tests/test_lm_filter.py
tests/test_opus_filter.py
tests/test_preprocessors.py
tests/test_segment_hash.py
tests/test_tokenization.py
tests/test_wordalign_filter.py
tests/test_yaml.py