{"version":"1.0","workflow_uuid":"2b346f1b-42ba-11f1-9bc6-00163e2b0d79","workflow_title":"Hugging Face Tokenizers — Fast Text Tokenization for ML Pipelines","install_contract":{"version":"1.0","installReady":false,"title":"Hugging Face Tokenizers — Fast Text Tokenization for ML Pipelines","summary":"Hugging Face Tokenizers is a Rust-powered tokenization library with Python bindings that implements BPE, WordPiece, Unigram, and SentencePiece tokenizers with training and encoding speeds of gigabytes per second, used as the backbone for Transformers model tokenization.","assetType":"Configs","pageUrl":"https://tokrepo.com/en/workflows/2b346f1b-42ba-11f1-9bc6-00163e2b0d79","sourceUrl":"https://github.com/huggingface/tokenizers","intendedFor":[],"firstActions":[],"agentFirstSteps":[],"targetPaths":[],"verification":[],"startingPoints":[],"example":"","successOutcome":"","boundaries":[],"askUserIf":["the current workspace stack cannot be matched to a safe upstream template","the target path is not the project root, or an existing file should be merged instead of overwritten"]}}