{
  "_id": "6a1f18c0b401979e7341edc9",
  "Package": "textdata",
  "Title": "Download and Load Various Text Datasets",
  "Version": "0.4.5.9000",
  "Authors@R": "c(\nperson(\"Emil\", \"Hvitfeldt\", , \"emilhhvitfeldt@gmail.com\", role = c(\"aut\", \"cre\"),\ncomment = c(ORCID = \"0000-0002-0679-1945\")),\nperson(\"Julia\", \"Silge\", , \"julia.silge@gmail.com\", role = \"ctb\",\ncomment = c(ORCID = \"0000-0002-3671-836X\"))\n)",
  "Description": "Provides a framework to download, parse, and store text\ndatasets on the disk and load them when needed. Includes\nvarious sentiment lexicons and labeled text data sets for\nclassification and analysis.",
  "License": "MIT + file LICENSE",
  "URL": "https://emilhvitfeldt.github.io/textdata/,\nhttps://github.com/EmilHvitfeldt/textdata",
  "BugReports": "https://github.com/EmilHvitfeldt/textdata/issues",
  "VignetteBuilder": "knitr",
  "Encoding": "UTF-8",
  "RoxygenNote": "7.3.1.9000",
  "Collate": "'cache_info.R' 'dataset_ag_news.R' 'dataset_dbpedia.R'\n'dataset_imdb.R' 'dataset_sentence_polarity.R' 'dataset_trec.R'\n'embedding_glove.R' 'lexicon_nrc_vad.R' 'lexicon_nrc_eil.R'\n'lexicon_nrc.R' 'lexicon_bing.R' 'lexicon_loughran.R'\n'lexicon_afinn.R' 'download_functions.R' 'info.R'\n'load_dataset.R' 'printer.R' 'process_functions.R'\n'textdata-package.R'",
  "Config/pak/sysreqs": "cmake make libuv1-dev libx11-dev",
  "Repository": "https://emilhvitfeldt.r-universe.dev",
  "Date/Publication": "2024-05-28 22:00:24 UTC",
  "RemoteUrl": "https://github.com/emilhvitfeldt/textdata",
  "RemoteRef": "HEAD",
  "RemoteSha": "7a99a97b4e7f30927bc5509d5dfaafd2aa8b58d6",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-17 05:36:04 UTC",
    "User": "root"
  },
  "Author": "Emil Hvitfeldt [aut, cre] (ORCID:\n<https://orcid.org/0000-0002-0679-1945>),\nJulia Silge [ctb] (ORCID: <https://orcid.org/0000-0002-3671-836X>)",
  "Maintainer": "Emil Hvitfeldt <emilhhvitfeldt@gmail.com>",
  "MD5sum": "7a398e203f59164de3bc69b3a59285e4",
  "_user": "emilhvitfeldt",
  "_type": "src",
  "_file": "textdata_0.4.5.9000.tar.gz",
  "_fileid": "7da8d4ec58c9180f1f8d1c1e5deb720691e3f130bfa24635bfe338851c63f448",
  "_filesize": 4776216,
  "_sha256": "7da8d4ec58c9180f1f8d1c1e5deb720691e3f130bfa24635bfe338851c63f448",
  "_created": "2026-05-17T05:36:04.000Z",
  "_published": "2026-06-02T17:54:08.848Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 79136804262,
      "time": 135,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7039673610"
    },
    {
      "job": 79136804242,
      "time": 136,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7039674009"
    },
    {
      "job": 79136804307,
      "time": 78,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7039667827"
    },
    {
      "job": 79136804423,
      "time": 104,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7039670558"
    },
    {
      "job": 79136803275,
      "time": 184,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7039659808"
    },
    {
      "job": 79136803377,
      "time": 112,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7365687393"
    },
    {
      "job": 79136804013,
      "time": 86,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7039668689"
    },
    {
      "job": 79136804709,
      "time": 69,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7039666899"
    },
    {
      "job": 79136804107,
      "time": 76,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7039667665"
    }
  ],
  "_buildurl": "https://github.com/r-universe/emilhvitfeldt/actions/runs/25982468383",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/emilhvitfeldt/textdata",
  "_commit": {
    "id": "7a99a97b4e7f30927bc5509d5dfaafd2aa8b58d6",
    "author": "Emil Hvitfeldt <emilhhvitfeldt@gmail.com>",
    "committer": "GitHub <noreply@github.com>",
    "message": "Merge pull request #58 from EmilHvitfeldt/PC-0.4.5\n\nRelease Candidate 0.4.5",
    "time": 1716933624
  },
  "_maintainer": {
    "name": "Emil Hvitfeldt",
    "email": "emilhhvitfeldt@gmail.com",
    "login": "emilhvitfeldt",
    "bluesky": "@emilhvitfeldt.bsky.social",
    "description": "All things @tidymodels",
    "uuid": 14034784,
    "orcid": "0000-0002-0679-1945"
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "fs",
      "role": "Imports"
    },
    {
      "package": "rappdirs",
      "role": "Imports"
    },
    {
      "package": "readr",
      "role": "Imports"
    },
    {
      "package": "tibble",
      "role": "Imports"
    },
    {
      "package": "covr",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    },
    {
      "package": "testthat",
      "version": ">= 2.1.0",
      "role": "Suggests"
    }
  ],
  "_owner": "emilhvitfeldt",
  "_selfowned": true,
  "_usedby": 3,
  "_updates": [],
  "_tags": [],
  "_topics": [
    "text-datasets"
  ],
  "_stars": 78,
  "_contributors": [
    {
      "user": "emilhvitfeldt",
      "count": 134,
      "uuid": 14034784
    },
    {
      "user": "juliasilge",
      "count": 5,
      "uuid": 12505835
    },
    {
      "user": "ellisvalentiner",
      "count": 1,
      "uuid": 9932219
    },
    {
      "user": "jmclawson",
      "count": 1,
      "uuid": 3894072
    },
    {
      "user": "jonthegeek",
      "count": 1,
      "uuid": 33983824
    },
    {
      "user": "olivroy",
      "count": 1,
      "uuid": 52606734
    }
  ],
  "_userbio": {
    "uuid": 14034784,
    "type": "user",
    "name": "Emil Hvitfeldt",
    "description": "All things @tidymodels"
  },
  "_downloads": {
    "count": 9059,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/textdata"
  },
  "_devurl": "https://github.com/emilhvitfeldt/textdata",
  "_pkgdown": "https://emilhvitfeldt.github.io/textdata/",
  "_searchresults": 1548,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "extra/textdata.html",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/emilhvitfeldt/textdata",
  "_realowner": "emilhvitfeldt",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1.0",
      "date": "2019-06-12"
    },
    {
      "version": "0.2.0",
      "date": "2019-07-22"
    },
    {
      "version": "0.3.0",
      "date": "2019-08-28"
    },
    {
      "version": "0.4.0",
      "date": "2020-03-06"
    },
    {
      "version": "0.4.1",
      "date": "2020-05-04"
    },
    {
      "version": "0.4.2",
      "date": "2022-05-02"
    },
    {
      "version": "0.4.3",
      "date": "2022-08-15"
    },
    {
      "version": "0.4.4",
      "date": "2022-09-02"
    },
    {
      "version": "0.4.5",
      "date": "2024-05-28"
    }
  ],
  "_exports": [
    "cache_info",
    "catalogue",
    "dataset_ag_news",
    "dataset_dbpedia",
    "dataset_imdb",
    "dataset_sentence_polarity",
    "dataset_trec",
    "embedding_glove27b",
    "embedding_glove42b",
    "embedding_glove6b",
    "embedding_glove840b",
    "lexicon_afinn",
    "lexicon_bing",
    "lexicon_loughran",
    "lexicon_nrc",
    "lexicon_nrc_eil",
    "lexicon_nrc_vad",
    "load_dataset"
  ],
  "_help": [
    {
      "page": "cache_info",
      "title": "List folders and their sizes in cache",
      "topics": [
        "cache_info"
      ]
    },
    {
      "page": "catalogue",
      "title": "Catalogue of all available data sources",
      "topics": [
        "catalogue"
      ]
    },
    {
      "page": "dataset_ag_news",
      "title": "AG's News Topic Classification Dataset",
      "concept": [
        "topic"
      ],
      "topics": [
        "dataset_ag_news"
      ]
    },
    {
      "page": "dataset_dbpedia",
      "title": "DBpedia Ontology Dataset",
      "concept": [
        "topic"
      ],
      "topics": [
        "dataset_dbpedia"
      ]
    },
    {
      "page": "dataset_imdb",
      "title": "IMDB Large Movie Review Dataset",
      "concept": [
        "topic sentiment"
      ],
      "topics": [
        "dataset_imdb"
      ]
    },
    {
      "page": "dataset_sentence_polarity",
      "title": "v1.0 sentence polarity dataset",
      "concept": [
        "sentiment"
      ],
      "topics": [
        "dataset_sentence_polarity"
      ]
    },
    {
      "page": "dataset_trec",
      "title": "TREC dataset",
      "concept": [
        "topic"
      ],
      "topics": [
        "dataset_trec"
      ]
    },
    {
      "page": "embedding_glove",
      "title": "Global Vectors for Word Representation",
      "concept": [
        "embeddings"
      ],
      "topics": [
        "embedding_glove",
        "embedding_glove27b",
        "embedding_glove42b",
        "embedding_glove6b",
        "embedding_glove840b"
      ]
    },
    {
      "page": "lexicon_afinn",
      "title": "AFINN-111 dataset",
      "concept": [
        "lexicon"
      ],
      "topics": [
        "lexicon_afinn"
      ]
    },
    {
      "page": "lexicon_bing",
      "title": "Bing sentiment lexicon",
      "concept": [
        "lexicon"
      ],
      "topics": [
        "lexicon_bing"
      ]
    },
    {
      "page": "lexicon_loughran",
      "title": "Loughran-McDonald sentiment lexicon",
      "concept": [
        "lexicon"
      ],
      "topics": [
        "lexicon_loughran"
      ]
    },
    {
      "page": "lexicon_nrc",
      "title": "NRC word-emotion association lexicon",
      "concept": [
        "lexicon"
      ],
      "topics": [
        "lexicon_nrc"
      ]
    },
    {
      "page": "lexicon_nrc_eil",
      "title": "NRC Emotion Intensity Lexicon (aka Affect Intensity Lexicon) v0.5",
      "concept": [
        "lexicon"
      ],
      "topics": [
        "lexicon_nrc_eil"
      ]
    },
    {
      "page": "lexicon_nrc_vad",
      "title": "The NRC Valence, Arousal, and Dominance Lexicon",
      "concept": [
        "lexicon"
      ],
      "topics": [
        "lexicon_nrc_vad"
      ]
    }
  ],
  "_pkglogo": "https://github.com/emilhvitfeldt/textdata/raw/HEAD/man/figures/logo.png",
  "_readme": "https://github.com/emilhvitfeldt/textdata/raw/HEAD/README.md",
  "_rundeps": [
    "bit",
    "bit64",
    "cli",
    "clipr",
    "cpp11",
    "crayon",
    "fs",
    "glue",
    "hms",
    "lifecycle",
    "magrittr",
    "pillar",
    "pkgconfig",
    "prettyunits",
    "progress",
    "R6",
    "rappdirs",
    "readr",
    "rlang",
    "tibble",
    "tidyselect",
    "tzdb",
    "utf8",
    "vctrs",
    "vroom",
    "withr"
  ],
  "_vignettes": [
    {
      "source": "How-to-add-a-data-set.Rmd",
      "filename": "How-to-add-a-data-set.html",
      "title": "How to add a data set",
      "engine": "knitr::rmarkdown",
      "headings": [
        "Guidelines for textdata datasets",
        "Classification datasets"
      ],
      "created": "2019-06-08 22:16:31",
      "modified": "2022-05-02 16:30:06",
      "commits": 8
    }
  ],
  "_score": 10.391128336806617,
  "_indexed": true,
  "_nocasepkg": "textdata",
  "_universes": [
    "emilhvitfeldt"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "0.4.5.9000",
      "date": "2026-05-17T05:38:16.000Z",
      "distro": "noble",
      "commit": "7a99a97b4e7f30927bc5509d5dfaafd2aa8b58d6",
      "fileid": "1c624413cde5e20957657efeca2572db5533990c37c76f3d69037bddacbb4ef3",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/emilhvitfeldt/actions/runs/25982468383"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "0.4.5.9000",
      "date": "2026-05-17T05:38:16.000Z",
      "distro": "noble",
      "commit": "7a99a97b4e7f30927bc5509d5dfaafd2aa8b58d6",
      "fileid": "0e394b973b3968bd6bed6430e8e91f5a288c8dee7e3dede2995c057ca553cf86",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/emilhvitfeldt/actions/runs/25982468383"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "0.4.5.9000",
      "date": "2026-05-17T05:37:27.000Z",
      "commit": "7a99a97b4e7f30927bc5509d5dfaafd2aa8b58d6",
      "fileid": "93852c491341395a9fe24adbf85d665a705d235a0bf7dce0c8202b53d39c86dd",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/emilhvitfeldt/actions/runs/25982468383"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "0.4.5.9000",
      "date": "2026-05-17T05:37:48.000Z",
      "commit": "7a99a97b4e7f30927bc5509d5dfaafd2aa8b58d6",
      "fileid": "f7f2b492a35fb470b54cb3fa06ff2f8195894e0ba9c933ab491e41ef9b6a0fdd",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/emilhvitfeldt/actions/runs/25982468383"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "0.4.5.9000",
      "date": "2026-05-17T05:37:19.000Z",
      "commit": "7a99a97b4e7f30927bc5509d5dfaafd2aa8b58d6",
      "fileid": "7bfdb7574b038072b278255d383f4b876b42589b671a24922e1ec5ef5283e293",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/emilhvitfeldt/actions/runs/25982468383"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "0.4.5.9000",
      "date": "2026-05-17T05:37:06.000Z",
      "commit": "7a99a97b4e7f30927bc5509d5dfaafd2aa8b58d6",
      "fileid": "ff1113e7926e4c1a55d93fff670bf8ad7156089e83241e3bb355d7419ae5111f",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/emilhvitfeldt/actions/runs/25982468383"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "0.4.5.9000",
      "date": "2026-05-17T05:37:12.000Z",
      "commit": "7a99a97b4e7f30927bc5509d5dfaafd2aa8b58d6",
      "fileid": "8f21c62725ef027c81a794c3f525ff691cb4f99d93bf8bff3bd04dea8b6658fa",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/emilhvitfeldt/actions/runs/25982468383"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "0.4.5.9000",
      "date": "2026-06-02T17:53:51.000Z",
      "commit": "7a99a97b4e7f30927bc5509d5dfaafd2aa8b58d6",
      "fileid": "d59d9403c931bc21eda88eac55732b1d765013919c2adbe2120a6cacc48619ba",
      "status": "success",
      "buildurl": "https://github.com/r-universe/emilhvitfeldt/actions/runs/25982468383"
    }
  ]
}