{"version":"1.0","workflow_uuid":"b92244fc-578c-11f1-9bc6-00163e2b0d79","workflow_title":"SentencePiece — Language-Independent Subword Tokenizer","install_contract":{"version":"1.0","installReady":false,"title":"SentencePiece — Language-Independent Subword Tokenizer","summary":"An unsupervised text tokenizer and detokenizer by Google that implements BPE and unigram language model algorithms, used as the tokenization backbone for many large language models.","assetType":"Configs","pageUrl":"https://tokrepo.com/en/workflows/asset-b92244fc","sourceUrl":"https://github.com/google/sentencepiece","intendedFor":[],"firstActions":[],"agentFirstSteps":[],"targetPaths":[],"verification":[],"startingPoints":[],"example":"","successOutcome":"","boundaries":[],"askUserIf":["the current workspace stack cannot be matched to a safe upstream template","the target path is not the project root, or an existing file should be merged instead of overwritten"]}}