{
  "_id": "6a3397323efcd9bda43a2a5d",
  "Package": "tok",
  "Title": "Fast Text Tokenization",
  "Version": "0.2.2.9000",
  "Authors@R": "c(\nperson(\"Tomasz\", \"Kalinowski\", , \"tomasz@posit.co\", c(\"ctb\", \"cre\")),\nperson(\"Daniel\", \"Falbel\", , \"dfalbel@gmail.com\", c(\"aut\")),\nperson(\"Regouby\", \"Christophe\", , \"christophe.regouby@free.fr\", c(\"ctb\")),\nperson(family = \"Posit\", role = c(\"cph\"))\n)",
  "Description": "Interfaces with the 'Hugging Face' tokenizers library to\nprovide implementations of today's most used tokenizers such as\nthe 'Byte-Pair Encoding' algorithm\n<https://huggingface.co/docs/tokenizers/index>. It's extremely\nfast for both training new vocabularies and tokenizing texts.",
  "License": "MIT + file LICENSE",
  "SystemRequirements": "Cargo (Rust's package manager), rustc >= 1.77.2",
  "Encoding": "UTF-8",
  "Roxygen": "list(markdown = TRUE)",
  "Config/testthat/edition": "3",
  "URL": "https://github.com/mlverse/tok",
  "BugReports": "https://github.com/mlverse/tok/issues",
  "Config/rextendr/version": "0.5.0",
  "Config/roxygen2/version": "8.0.0",
  "Config/pak/sysreqs": "libclang-dev",
  "Repository": "https://mlverse.r-universe.dev",
  "Date/Publication": "2026-05-19 13:09:10 UTC",
  "RemoteUrl": "https://github.com/mlverse/tok",
  "RemoteRef": "HEAD",
  "RemoteSha": "f925ad65e356dc6d295b633391aa00eae9dad8fe",
  "NeedsCompilation": "yes",
  "Packaged": {
    "Date": "2026-06-18 06:45:09 UTC",
    "User": "root"
  },
  "Author": "Tomasz Kalinowski [ctb, cre],\nDaniel Falbel [aut],\nRegouby Christophe [ctb],\nPosit [cph]",
  "Maintainer": "Tomasz Kalinowski <tomasz@posit.co>",
  "MD5sum": "7d2225fe0e960e6253ea2dd89bd964fa",
  "_user": "mlverse",
  "_type": "src",
  "_file": "tok_0.2.2.9000.tar.gz",
  "_fileid": "b9483f6857d3c852e6a2892c58182c00ed03a3b5bb69027e8add0d33593f5fcf",
  "_filesize": 11766996,
  "_sha256": "b9483f6857d3c852e6a2892c58182c00ed03a3b5bb69027e8add0d33593f5fcf",
  "_created": "2026-06-18T06:45:09.000Z",
  "_published": "2026-06-18T06:58:58.845Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 82070914585,
      "time": 248,
      "config": "linux-devel-arm64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7716139659"
    },
    {
      "job": 82070914543,
      "time": 246,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7716138137"
    },
    {
      "job": 82070914557,
      "time": 252,
      "config": "linux-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7716139770"
    },
    {
      "job": 82070914500,
      "time": 240,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7716136887"
    },
    {
      "job": 82070914509,
      "time": 211,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7716216024"
    },
    {
      "job": 82070914555,
      "time": 466,
      "config": "macos-oldrel-x86_64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7716206170"
    },
    {
      "job": 82070914542,
      "time": 158,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7716164627"
    },
    {
      "job": 82070914512,
      "time": 482,
      "config": "macos-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7716268929"
    },
    {
      "job": 82070237199,
      "time": 281,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7716065782"
    },
    {
      "job": 82070914471,
      "time": 198,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "FAIL",
      "artifact": ""
    },
    {
      "job": 82070914551,
      "time": 300,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7716154365"
    },
    {
      "job": 82070914546,
      "time": 335,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7716164740"
    },
    {
      "job": 82070914566,
      "time": 312,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7716158003"
    }
  ],
  "_buildurl": "https://github.com/r-universe/mlverse/actions/runs/27741698020",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/mlverse/tok",
  "_commit": {
    "id": "f925ad65e356dc6d295b633391aa00eae9dad8fe",
    "author": "Tomasz Kalinowski <kalinowskit@gmail.com>",
    "committer": "Tomasz Kalinowski <kalinowskit@gmail.com>",
    "message": "update `cran-comments.md`\n",
    "time": 1779196150
  },
  "_maintainer": {
    "name": "Tomasz Kalinowski",
    "email": "tomasz@posit.co",
    "login": "t-kalinowski",
    "mastodon": "@t_kalinowski@fosstodon.org",
    "bluesky": "@t-kalinowski.bsky.social",
    "description": "",
    "uuid": 8462255
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 4.2.0",
      "role": "Depends"
    },
    {
      "package": "R6",
      "role": "Imports"
    },
    {
      "package": "cli",
      "role": "Imports"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 3.0.0",
      "role": "Suggests"
    },
    {
      "package": "hfhub",
      "version": ">= 0.1.1",
      "role": "Suggests"
    },
    {
      "package": "withr",
      "role": "Suggests"
    }
  ],
  "_owner": "mlverse",
  "_selfowned": true,
  "_usedby": 1,
  "_updates": [
    {
      "week": "2025-35",
      "n": 7
    },
    {
      "week": "2025-40",
      "n": 6
    },
    {
      "week": "2026-16",
      "n": 3
    },
    {
      "week": "2026-17",
      "n": 2
    },
    {
      "week": "2026-21",
      "n": 2
    }
  ],
  "_tags": [
    {
      "name": "v0.2.0",
      "date": "2025-08-26"
    },
    {
      "name": "v0.2.1",
      "date": "2025-09-30"
    },
    {
      "name": "v0.2.2",
      "date": "2026-04-21"
    }
  ],
  "_stars": 47,
  "_contributors": [
    {
      "user": "dfalbel",
      "count": 107,
      "uuid": 4706822
    },
    {
      "user": "t-kalinowski",
      "count": 8,
      "uuid": 8462255
    },
    {
      "user": "cregouby",
      "count": 1,
      "uuid": 10136115
    }
  ],
  "_userbio": {
    "uuid": 55406849,
    "type": "organization",
    "name": "mlverse",
    "followers": 196,
    "description": "Open source libraries to scale Data Science"
  },
  "_downloads": {
    "count": 80,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/tok"
  },
  "_devurl": "https://github.com/mlverse/tok",
  "_searchresults": 18,
  "_cargo": true,
  "_topics": [
    "rust",
    "cargo"
  ],
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "extra/tok.html",
    "LICENSE",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/mlverse/tok",
  "_realowner": "mlverse",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1.0",
      "date": "2023-07-06"
    },
    {
      "version": "0.1.1",
      "date": "2023-08-18"
    },
    {
      "version": "0.1.2",
      "date": "2024-06-27"
    },
    {
      "version": "0.1.3",
      "date": "2024-07-06"
    },
    {
      "version": "0.1.4",
      "date": "2024-09-04"
    },
    {
      "version": "0.2.0",
      "date": "2025-08-27"
    },
    {
      "version": "0.2.1",
      "date": "2025-10-03"
    },
    {
      "version": "0.2.2",
      "date": "2026-04-22"
    }
  ],
  "_exports": [
    "decoder_byte_level",
    "encoding",
    "model_bpe",
    "model_unigram",
    "model_wordpiece",
    "normalizer_nfc",
    "normalizer_nfkc",
    "pre_tokenizer",
    "pre_tokenizer_byte_level",
    "pre_tokenizer_whitespace",
    "processor_byte_level",
    "tok_decoder",
    "tok_model",
    "tok_normalizer",
    "tok_processor",
    "tok_trainer",
    "tokenizer",
    "trainer_bpe",
    "trainer_unigram",
    "trainer_wordpiece"
  ],
  "_help": [
    {
      "page": "decoder_byte_level",
      "title": "Byte level decoder",
      "concept": [
        "decoders"
      ],
      "topics": [
        "decoder_byte_level"
      ]
    },
    {
      "page": "encoding",
      "title": "Encoding",
      "topics": [
        "encoding"
      ]
    },
    {
      "page": "model_bpe",
      "title": "BPE model",
      "concept": [
        "model"
      ],
      "topics": [
        "model_bpe"
      ]
    },
    {
      "page": "model_unigram",
      "title": "An implementation of the Unigram algorithm",
      "concept": [
        "model"
      ],
      "topics": [
        "model_unigram"
      ]
    },
    {
      "page": "model_wordpiece",
      "title": "An implementation of the WordPiece algorithm",
      "concept": [
        "model"
      ],
      "topics": [
        "model_wordpiece"
      ]
    },
    {
      "page": "normalizer_nfc",
      "title": "NFC normalizer",
      "concept": [
        "normalizers"
      ],
      "topics": [
        "normalizer_nfc"
      ]
    },
    {
      "page": "normalizer_nfkc",
      "title": "NFKC normalizer",
      "concept": [
        "normalizers"
      ],
      "topics": [
        "normalizer_nfkc"
      ]
    },
    {
      "page": "pre_tokenizer",
      "title": "Generic class for tokenizers",
      "concept": [
        "pre_tokenizer"
      ],
      "topics": [
        "pre_tokenizer"
      ]
    },
    {
      "page": "pre_tokenizer_byte_level",
      "title": "Byte level pre tokenizer",
      "concept": [
        "pre_tokenizer"
      ],
      "topics": [
        "pre_tokenizer_byte_level"
      ]
    },
    {
      "page": "pre_tokenizer_whitespace",
      "title": "This pre-tokenizer simply splits using the following regex: \\w+|[^\\w\\s]+",
      "concept": [
        "pre_tokenizer"
      ],
      "topics": [
        "pre_tokenizer_whitespace"
      ]
    },
    {
      "page": "processor_byte_level",
      "title": "Byte Level post processor",
      "concept": [
        "processors"
      ],
      "topics": [
        "processor_byte_level"
      ]
    },
    {
      "page": "tok_decoder",
      "title": "Generic class for decoders",
      "concept": [
        "decoders"
      ],
      "topics": [
        "tok_decoder"
      ]
    },
    {
      "page": "tok_model",
      "title": "Generic class for tokenization models",
      "concept": [
        "model"
      ],
      "topics": [
        "tok_model"
      ]
    },
    {
      "page": "tok_normalizer",
      "title": "Generic class for normalizers",
      "concept": [
        "normalizers"
      ],
      "topics": [
        "tok_normalizer"
      ]
    },
    {
      "page": "tok_processor",
      "title": "Generic class for processors",
      "concept": [
        "processors"
      ],
      "topics": [
        "tok_processor"
      ]
    },
    {
      "page": "tok_trainer",
      "title": "Generic training class",
      "concept": [
        "trainer"
      ],
      "topics": [
        "tok_trainer"
      ]
    },
    {
      "page": "tokenizer",
      "title": "Tokenizer",
      "topics": [
        "tokenizer"
      ]
    },
    {
      "page": "trainer_bpe",
      "title": "BPE trainer",
      "concept": [
        "trainer"
      ],
      "topics": [
        "trainer_bpe"
      ]
    },
    {
      "page": "trainer_unigram",
      "title": "Unigram tokenizer trainer",
      "concept": [
        "trainer"
      ],
      "topics": [
        "trainer_unigram"
      ]
    },
    {
      "page": "trainer_wordpiece",
      "title": "WordPiece tokenizer trainer",
      "concept": [
        "trainer"
      ],
      "topics": [
        "trainer_wordpiece"
      ]
    }
  ],
  "_readme": "https://github.com/mlverse/tok/raw/HEAD/README.md",
  "_rundeps": [
    "cli",
    "R6"
  ],
  "_score": 6.103461622094705,
  "_indexed": true,
  "_nocasepkg": "tok",
  "_universes": [
    "mlverse",
    "t-kalinowski"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.2.2.9000",
      "date": "2026-06-18T06:49:14.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "f925ad65e356dc6d295b633391aa00eae9dad8fe",
      "fileid": "ff4f22cb5e23d39f998af646c8fdb6a0e63a419313feb69711f93a384f30c0cf",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlverse/actions/runs/27741698020"
    },
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.2.2.9000",
      "date": "2026-06-18T06:49:16.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "f925ad65e356dc6d295b633391aa00eae9dad8fe",
      "fileid": "c8f1e420ad02b434cfdd7bcfadd376795f1eb8856700c1742ce93856b1a728cb",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlverse/actions/runs/27741698020"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.2.2.9000",
      "date": "2026-06-18T06:49:14.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "f925ad65e356dc6d295b633391aa00eae9dad8fe",
      "fileid": "56b3a372666f699bebd121fc76c292032a0d371baad2dce6e028531948244d90",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlverse/actions/runs/27741698020"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.2.2.9000",
      "date": "2026-06-18T06:49:16.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "f925ad65e356dc6d295b633391aa00eae9dad8fe",
      "fileid": "841fefdd6bbaf2f122f9fe8dfa9c548e9828e761bf2f1369564ea257edb4c1af",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlverse/actions/runs/27741698020"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.2.2.9000",
      "date": "2026-06-18T06:53:19.000Z",
      "arch": "aarch64",
      "commit": "f925ad65e356dc6d295b633391aa00eae9dad8fe",
      "fileid": "fe9912ef4e573c8656fcf2c34ca28024c58991de4ee0324f24acc19c4b585b18",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlverse/actions/runs/27741698020"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.2.2.9000",
      "date": "2026-06-18T06:50:12.000Z",
      "arch": "x86_64",
      "commit": "f925ad65e356dc6d295b633391aa00eae9dad8fe",
      "fileid": "1e3e743092666e3aa97a9978fc01182e413a4a34693eae57ebc43e8c39b14be4",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlverse/actions/runs/27741698020"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.2.2.9000",
      "date": "2026-06-18T06:50:59.000Z",
      "arch": "aarch64",
      "commit": "f925ad65e356dc6d295b633391aa00eae9dad8fe",
      "fileid": "c706171c35ffb4ca318a604627a575aeaaee684bb216a4b7262690365c008a10",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlverse/actions/runs/27741698020"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.2.2.9000",
      "date": "2026-06-18T06:54:45.000Z",
      "arch": "x86_64",
      "commit": "f925ad65e356dc6d295b633391aa00eae9dad8fe",
      "fileid": "d7c8518ad33e01c9103e87c0821b19aad8189a2511adb7cb0576eb10045e1313",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlverse/actions/runs/27741698020"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.2.2.9000",
      "date": "2026-06-18T06:48:01.000Z",
      "arch": "x86_64",
      "commit": "f925ad65e356dc6d295b633391aa00eae9dad8fe",
      "fileid": "7cf811c4df1f7079c705a3758bd68bdd762d42929bddc078e0be41b78d9b588e",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlverse/actions/runs/27741698020"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.2.2.9000",
      "date": "2026-06-18T06:48:00.000Z",
      "arch": "x86_64",
      "commit": "f925ad65e356dc6d295b633391aa00eae9dad8fe",
      "fileid": "f8a686de32406f8a9cfc664c09b20801376dc9b05998908e9dee160af3b67959",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlverse/actions/runs/27741698020"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.2.2.9000",
      "date": "2026-06-18T06:48:12.000Z",
      "arch": "x86_64",
      "commit": "f925ad65e356dc6d295b633391aa00eae9dad8fe",
      "fileid": "e0608ccce57a6ef216d190ee74613122f9421712d758b8b66664a1511684c92d",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlverse/actions/runs/27741698020"
    }
  ]
}