Metadata-Version: 2.1
Name: soco-tokenizer
Version: 1.2
Summary: Fast tokenizer
Home-page: https://www.soco.ai
Author: tianchez
License: UNKNOWN
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: License :: Free for non-commercial use
Classifier: Operating System :: OS Independent
Description-Content-Type: text/markdown
Requires-Dist: numpy (>=1.15.3)
Requires-Dist: boto3 (>=1.9.46)
Requires-Dist: oss2 (>=2.6.0)
Requires-Dist: tqdm (>=4.45.0)
Requires-Dist: nltk (>=3.5)
Requires-Dist: jieba-fast (>=0.53)
Requires-Dist: spacy (>=2.2.4)
Requires-Dist: requests (>=2.23.0)
Requires-Dist: tokenizers (>=0.7.0)

# Tokenizer
Tokenizer different language fast.


## Build a package
    python setup.py bdist_wheel
    twine upload dist/*

## Use Locally
    x1 = '<a>刘强东是一个著名企业家。</a> 他创建了京东。'
    t = EncoderLoader.load_tokenizer('bert-base-chinese-zh_v4-10K')
    print(t.tokenize(x1, mode='char'))
    print(t.tokenize(x1, mode='word'))
    print(t.tokenize(x1, mode='all'))

