Typescript tokenizer for Mistral models.
-
mistral-large-latest
(points tomistral-large-2407
) mistral-large-2402
mistral-large-2407
-
mistral-small-latest
(points tomistral-small-2402
) mistral-small-2402
-
open-mistral-nemo
(points toopen-mistral-nemo-2407
) open-mistral-nemo-2407
-
codestral-latest
(points tocodestral-2405
) codestral-2405
-
mistral-embed
(points tomistral-embed-2312
) mistral-embed-2312
-
open-mistral-7b
(points toopen-mistral-7b-v0.3
) open-mistral-7b-v0.1
open-mistral-7b-v0.2
open-mistral-7b-v0.3
-
open-mixtral-8x7b
(points toopen-mixtral-8x7b-v0.1
) open-mixtral-8x7b-v0.1
-
open-mixtral-8x22b
(points toopen-mixtral-8x22b-v0.1
) open-mixtral-8x22b-v0.1
-
open-codestral-mamba
(points toopen-codestral-mamba-v0.1
) open-codestral-mamba-v0.1
npm install mistral-tokenizer-ts
import { getTokenizerForModel } from 'mistral-tokenizer-ts'
const tokenizer = getTokenizerForModel('open-mistral-7b')
// Encode.
const encoded = tokenizer.encode('Hello world!')
// Decode.
const decoded = tokenizer.decode([1, 22557, 1526])
npm run test
-
@imoneoi
for the initial implementation -
@dqbd
for the tiktoken JS port -
@mistralai
for the Python tokenizers