{
  "_id": "6a1d72be1d7bb097a0a50e9d",
  "Package": "textTinyR",
  "Type": "Package",
  "Title": "Text Processing for Small or Big Data Files",
  "Version": "1.1.8",
  "Date": "2023-12-04",
  "Authors@R": "c( person(given = \"Lampros\", family = \"Mouselimis\", email = \"mouselimislampros@gmail.com\", role = c(\"aut\", \"cre\"), comment = c(ORCID = \"https://orcid.org/0000-0002-8024-1546\")))",
  "BugReports": "https://github.com/mlampros/textTinyR/issues",
  "URL": "https://github.com/mlampros/textTinyR",
  "Description": "It offers functions for splitting, parsing, tokenizing and\ncreating a vocabulary for big text data files. Moreover, it\nincludes functions for building a document-term matrix and\nextracting information from those (term-associations, most\nfrequent terms). It also embodies functions for calculating\ntoken statistics (collocations, look-up tables, string\ndissimilarities) and functions to work with sparse matrices.\nLastly, it includes functions for Word Vector Representations\n(i.e. 'GloVe', 'fasttext') and incorporates functions for the\ncalculation of (pairwise) text document dissimilarities. The\nsource code is based on 'C++11' and exported in R through the\n'Rcpp', 'RcppArmadillo' and 'BH' packages.",
  "License": "GPL-3",
  "Copyright": "inst/COPYRIGHTS",
  "SystemRequirements": "libarmadillo: apt-get install -y libarmadillo-dev\n(deb)",
  "Encoding": "UTF-8",
  "VignetteBuilder": "knitr",
  "RoxygenNote": "7.2.3",
  "NeedsCompilation": "yes",
  "Packaged": {
    "Date": "2026-06-01 11:47:06 UTC",
    "User": "root"
  },
  "Repository": "https://mlampros.r-universe.dev",
  "Date/Publication": "2023-12-05 08:38:29 UTC",
  "RemoteUrl": "https://github.com/mlampros/texttinyr",
  "RemoteRef": "HEAD",
  "RemoteSha": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
  "Author": "Lampros Mouselimis [aut, cre] (ORCID:\n<https://orcid.org/0000-0002-8024-1546>)",
  "Maintainer": "Lampros Mouselimis <mouselimislampros@gmail.com>",
  "MD5sum": "b8c27f133621f118eec650c64aa74ce3",
  "_user": "mlampros",
  "_type": "src",
  "_file": "textTinyR_1.1.8.tar.gz",
  "_fileid": "0b3e578819e3f6d33e2e5bacc1a42b4ec06ca283ddd2975722d87dc0baa030ab",
  "_filesize": 1115389,
  "_sha256": "0b3e578819e3f6d33e2e5bacc1a42b4ec06ca283ddd2975722d87dc0baa030ab",
  "_created": "2026-06-01T11:47:06.000Z",
  "_published": "2026-06-01T11:53:34.856Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 78845545235,
      "time": 205,
      "config": "linux-devel-arm64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7330699609"
    },
    {
      "job": 78845545267,
      "time": 183,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7330691765"
    },
    {
      "job": 78845545319,
      "time": 221,
      "config": "linux-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7330707160"
    },
    {
      "job": 78845545272,
      "time": 232,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7330708833"
    },
    {
      "job": 78845545236,
      "time": 191,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7330694118"
    },
    {
      "job": 78845545286,
      "time": 272,
      "config": "macos-oldrel-x86_64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7330721724"
    },
    {
      "job": 78845545244,
      "time": 150,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7330680997"
    },
    {
      "job": 78845545365,
      "time": 303,
      "config": "macos-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7330731011"
    },
    {
      "job": 78844851527,
      "time": 265,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7330629841"
    },
    {
      "job": 78845545192,
      "time": 143,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7330679006"
    },
    {
      "job": 78845545210,
      "time": 259,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7330717809"
    },
    {
      "job": 78845545249,
      "time": 212,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7330701870"
    },
    {
      "job": 78845545336,
      "time": 260,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7330717882"
    }
  ],
  "_buildurl": "https://github.com/r-universe/mlampros/actions/runs/26752784460",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/mlampros/texttinyr",
  "_commit": {
    "id": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
    "author": "Lampros Mouselimis <mouselimislampros@gmail.com>",
    "committer": "Lampros Mouselimis <mouselimislampros@gmail.com>",
    "message": "updated the tic.yml file\n",
    "time": 1701765509
  },
  "_maintainer": {
    "name": "Lampros Mouselimis",
    "email": "mouselimislampros@gmail.com",
    "login": "mlampros",
    "description": "Search (a little bit) and you'll find...",
    "uuid": 3055514,
    "orcid": "0000-0002-8024-1546"
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.2.3",
      "role": "Depends"
    },
    {
      "package": "Matrix",
      "role": "Depends"
    },
    {
      "package": "Rcpp",
      "role": "LinkingTo"
    },
    {
      "package": "RcppArmadillo",
      "version": ">= 0.7.8",
      "role": "LinkingTo"
    },
    {
      "package": "BH",
      "role": "LinkingTo"
    },
    {
      "package": "Rcpp",
      "version": ">= 0.12.10",
      "role": "Imports"
    },
    {
      "package": "R6",
      "role": "Imports"
    },
    {
      "package": "data.table",
      "role": "Imports"
    },
    {
      "package": "utils",
      "role": "Imports"
    },
    {
      "package": "testthat",
      "role": "Suggests"
    },
    {
      "package": "covr",
      "role": "Suggests"
    },
    {
      "package": "knitr",
      "role": "Suggests"
    },
    {
      "package": "rmarkdown",
      "role": "Suggests"
    }
  ],
  "_owner": "mlampros",
  "_selfowned": true,
  "_usedby": 1,
  "_updates": [],
  "_tags": [],
  "_topics": [
    "bh",
    "boost",
    "cpp11",
    "processing",
    "rcpp",
    "rcpparmadillo",
    "text",
    "openblas",
    "cpp",
    "openmp"
  ],
  "_stars": 39,
  "_contributors": [
    {
      "user": "monopteryx",
      "count": 48,
      "uuid": 114278455
    },
    {
      "user": "mlampros",
      "count": 25,
      "uuid": 3055514
    }
  ],
  "_userbio": {
    "uuid": 3055514,
    "type": "user",
    "name": "Lampros Mouselimis",
    "description": "Search (a little bit) and you'll find..."
  },
  "_downloads": {
    "count": 346,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/textTinyR"
  },
  "_devurl": "https://github.com/mlampros/texttinyr",
  "_searchresults": 260,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "extra/textTinyR.html",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/mlampros/texttinyr",
  "_realowner": "mlampros",
  "_cranurl": true,
  "_releases": [
    {
      "version": "1.0.0",
      "date": "2017-01-07"
    },
    {
      "version": "1.0.1",
      "date": "2017-01-10"
    },
    {
      "version": "1.0.2",
      "date": "2017-01-20"
    },
    {
      "version": "1.0.3",
      "date": "2017-01-29"
    },
    {
      "version": "1.0.4",
      "date": "2017-03-28"
    },
    {
      "version": "1.0.5",
      "date": "2017-04-01"
    },
    {
      "version": "1.0.6",
      "date": "2017-05-03"
    },
    {
      "version": "1.0.7",
      "date": "2017-06-05"
    },
    {
      "version": "1.0.8",
      "date": "2017-10-31"
    },
    {
      "version": "1.0.9",
      "date": "2018-01-16"
    },
    {
      "version": "1.1.0",
      "date": "2018-04-03"
    },
    {
      "version": "1.1.1",
      "date": "2018-05-17"
    },
    {
      "version": "1.1.2",
      "date": "2018-07-25"
    },
    {
      "version": "1.1.3",
      "date": "2019-04-14"
    },
    {
      "version": "1.1.4",
      "date": "2021-05-05"
    },
    {
      "version": "1.1.5",
      "date": "2021-10-13"
    },
    {
      "version": "1.1.6",
      "date": "2021-10-21"
    },
    {
      "version": "1.1.7",
      "date": "2021-10-26"
    },
    {
      "version": "1.1.8",
      "date": "2023-12-04"
    }
  ],
  "_exports": [
    "batch_compute",
    "big_tokenize_transform",
    "bytes_converter",
    "cluster_frequency",
    "COS_TEXT",
    "cosine_distance",
    "Count_Rows",
    "dense_2sparse",
    "dice_distance",
    "dims_of_word_vecs",
    "Doc2Vec",
    "JACCARD_DICE",
    "levenshtein_distance",
    "load_sparse_binary",
    "matrix_sparsity",
    "read_characters",
    "read_rows",
    "save_sparse_binary",
    "select_predictors",
    "sparse_Means",
    "sparse_Sums",
    "sparse_term_matrix",
    "TEXT_DOC_DISSIM",
    "text_file_parser",
    "text_intersect",
    "token_stats",
    "tokenize_transform_text",
    "tokenize_transform_vec_docs",
    "utf_locale",
    "vocabulary_parser"
  ],
  "_help": [
    {
      "page": "batch_compute",
      "title": "Compute batches",
      "topics": [
        "batch_compute"
      ]
    },
    {
      "page": "big_tokenize_transform",
      "title": "String tokenization and transformation for big data sets",
      "topics": [
        "big_tokenize_transform"
      ]
    },
    {
      "page": "bytes_converter",
      "title": "bytes converter of a text file ( KB, MB or GB )",
      "topics": [
        "bytes_converter"
      ]
    },
    {
      "page": "cluster_frequency",
      "title": "Frequencies of an existing cluster object",
      "topics": [
        "cluster_frequency"
      ]
    },
    {
      "page": "COS_TEXT",
      "title": "Cosine similarity for text documents",
      "topics": [
        "COS_TEXT"
      ]
    },
    {
      "page": "cosine_distance",
      "title": "cosine distance of two character strings (each string consists of more than one words)",
      "topics": [
        "cosine_distance"
      ]
    },
    {
      "page": "Count_Rows",
      "title": "Number of rows of a file",
      "topics": [
        "Count_Rows"
      ]
    },
    {
      "page": "dense_2sparse",
      "title": "convert a dense matrix to a sparse matrix",
      "topics": [
        "dense_2sparse"
      ]
    },
    {
      "page": "dice_distance",
      "title": "dice similarity of words using n-grams",
      "topics": [
        "dice_distance"
      ]
    },
    {
      "page": "dims_of_word_vecs",
      "title": "dimensions of a word vectors file",
      "topics": [
        "dims_of_word_vecs"
      ]
    },
    {
      "page": "Doc2Vec",
      "title": "Conversion of text documents to word-vector-representation features ( Doc2Vec )",
      "topics": [
        "Doc2Vec"
      ]
    },
    {
      "page": "JACCARD_DICE",
      "title": "Jaccard or Dice similarity for text documents",
      "topics": [
        "JACCARD_DICE"
      ]
    },
    {
      "page": "levenshtein_distance",
      "title": "levenshtein distance of two words",
      "topics": [
        "levenshtein_distance"
      ]
    },
    {
      "page": "load_sparse_binary",
      "title": "load a sparse matrix in binary format",
      "topics": [
        "load_sparse_binary"
      ]
    },
    {
      "page": "matrix_sparsity",
      "title": "sparsity percentage of a sparse matrix",
      "topics": [
        "matrix_sparsity"
      ]
    },
    {
      "page": "read_characters",
      "title": "read a specific number of characters from a text file",
      "topics": [
        "read_characters"
      ]
    },
    {
      "page": "read_rows",
      "title": "read a specific number of rows from a text file",
      "topics": [
        "read_rows"
      ]
    },
    {
      "page": "save_sparse_binary",
      "title": "save a sparse matrix in binary format",
      "topics": [
        "save_sparse_binary"
      ]
    },
    {
      "page": "select_predictors",
      "title": "Exclude highly correlated predictors",
      "topics": [
        "select_predictors"
      ]
    },
    {
      "page": "sparse_Means",
      "title": "RowMens and colMeans for a sparse matrix",
      "topics": [
        "sparse_Means"
      ]
    },
    {
      "page": "sparse_Sums",
      "title": "RowSums and colSums for a sparse matrix",
      "topics": [
        "sparse_Sums"
      ]
    },
    {
      "page": "sparse_term_matrix",
      "title": "Term matrices and statistics ( document-term-matrix, term-document-matrix)",
      "topics": [
        "sparse_term_matrix"
      ]
    },
    {
      "page": "TEXT_DOC_DISSIM",
      "title": "Dissimilarity calculation of text documents",
      "topics": [
        "TEXT_DOC_DISSIM"
      ]
    },
    {
      "page": "text_file_parser",
      "title": "text file parser",
      "topics": [
        "text_file_parser"
      ]
    },
    {
      "page": "text_intersect",
      "title": "intersection of words or letters in tokenized text",
      "topics": [
        "text_intersect"
      ]
    },
    {
      "page": "token_stats",
      "title": "token statistics",
      "topics": [
        "token_stats"
      ]
    },
    {
      "page": "tokenize_transform_text",
      "title": "String tokenization and transformation ( character string or path to a file )",
      "topics": [
        "tokenize_transform_text"
      ]
    },
    {
      "page": "tokenize_transform_vec_docs",
      "title": "String tokenization and transformation ( vector of documents )",
      "topics": [
        "tokenize_transform_vec_docs"
      ]
    },
    {
      "page": "utf_locale",
      "title": "utf-locale for the available languages",
      "topics": [
        "utf_locale"
      ]
    },
    {
      "page": "vocabulary_parser",
      "title": "returns the vocabulary counts for small or medium ( xml and not only ) files",
      "topics": [
        "vocabulary_parser"
      ]
    }
  ],
  "_readme": "https://github.com/mlampros/texttinyr/raw/HEAD/README.md",
  "_rundeps": [
    "BH",
    "data.table",
    "lattice",
    "Matrix",
    "R6",
    "Rcpp",
    "RcppArmadillo"
  ],
  "_sysdeps": [
    {
      "shlib": "libblas",
      "package": "libopenblas0-pthread",
      "source": "openblas",
      "version": "0.3.26+ds-1ubuntu0.1",
      "name": "openblas",
      "homepage": "https://www.openblas.net/",
      "description": "Optimized BLAS (linear algebra) library (shared lib, pthread)"
    },
    {
      "shlib": "libstdc++",
      "package": "libstdc++6",
      "source": "gcc",
      "version": "14.2.0-4ubuntu2~24.04.1",
      "name": "c++",
      "homepage": "http://gcc.gnu.org/",
      "description": "GNU Standard C++ Library v3"
    },
    {
      "shlib": "libgomp",
      "package": "libgomp1",
      "source": "gcc",
      "version": "14.2.0-4ubuntu2~24.04.1",
      "name": "openmp",
      "homepage": "http://gcc.gnu.org/",
      "description": "GCC OpenMP (GOMP) support library"
    }
  ],
  "_vignettes": [
    {
      "source": "functionality_of_textTinyR_package.Rmd",
      "filename": "functionality_of_textTinyR_package.html",
      "title": "Functionality of the textTinyR package",
      "author": "Lampros Mouselimis",
      "engine": "knitr::rmarkdown",
      "headings": [
        "classes",
        "functions",
        "big_tokenize_transform class",
        "word cloud",
        "word vectors",
        "sparse_term_matrix class",
        "token_stats class",
        "helper functions for sparse_matrices",
        "tokenization",
        "utility functions"
      ],
      "created": "2017-01-04 16:37:04",
      "modified": "2021-10-29 09:18:39",
      "commits": 8
    },
    {
      "source": "word_vectors_doc2vec.Rmd",
      "filename": "word_vectors_doc2vec.html",
      "title": "Word vectors - doc2vec - text clustering",
      "author": "Lampros Mouselimis",
      "engine": "knitr::rmarkdown",
      "headings": [
        "textTinyR - fastTextR - doc2vec - kmeans - cluster_medoids"
      ],
      "created": "2018-04-03 21:05:35",
      "modified": "2021-10-29 09:18:39",
      "commits": 4
    }
  ],
  "_score": 7.4831592097169795,
  "_indexed": true,
  "_nocasepkg": "texttinyr",
  "_universes": [
    "mlampros"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "1.1.8",
      "date": "2026-06-01T11:50:07.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
      "fileid": "b4e5a6a2f8a4fb9f56a8dd960b50dfb7f8c917d9af1a6a1626fe8aebda047f95",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlampros/actions/runs/26752784460"
    },
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "1.1.8",
      "date": "2026-06-01T11:49:53.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
      "fileid": "53b269492614d13443d0da613dd300d90c68f1b5f6650568fab15529d849e18d",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlampros/actions/runs/26752784460"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "1.1.8",
      "date": "2026-06-01T11:50:24.000Z",
      "distro": "noble",
      "arch": "aarch64",
      "commit": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
      "fileid": "823dcdfa70982132c1fd3a6fd3364fff8043d68e966eecf95804c7c95651fd67",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlampros/actions/runs/26752784460"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "1.1.8",
      "date": "2026-06-01T11:50:10.000Z",
      "distro": "noble",
      "arch": "x86_64",
      "commit": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
      "fileid": "a355957bbe1bd46149ffb917d33ab015b2c14057e14f48e6358d232b0e275607",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlampros/actions/runs/26752784460"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "1.1.8",
      "date": "2026-06-01T11:49:55.000Z",
      "arch": "aarch64",
      "commit": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
      "fileid": "05879004a93799d22e64fcd0524a15953a7f2af7bcfe37585d8784c1ade6686b",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlampros/actions/runs/26752784460"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "1.1.8",
      "date": "2026-06-01T11:50:33.000Z",
      "arch": "x86_64",
      "commit": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
      "fileid": "3c56dd9921d9056142b22dd8d5ad2f5d2b209d54671ea84ecb163f7b25ecc7b0",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlampros/actions/runs/26752784460"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "1.1.8",
      "date": "2026-06-01T11:49:29.000Z",
      "arch": "aarch64",
      "commit": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
      "fileid": "e737ec75494c227731a6d1d03c67383637391711f9a3c278d44e77d86a728470",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlampros/actions/runs/26752784460"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "1.1.8",
      "date": "2026-06-01T11:51:00.000Z",
      "arch": "x86_64",
      "commit": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
      "fileid": "dce5c79c7cea05eb2883b4700a1ef40504f9dc9f2df6bf0ec57c19159f98b51d",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlampros/actions/runs/26752784460"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "1.1.8",
      "date": "2026-06-01T11:50:08.000Z",
      "arch": "emscripten",
      "commit": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
      "fileid": "d6415b81dc6fa2358f6f331fe5215a2eccafab263744004607403374538110ff",
      "status": "success",
      "buildurl": "https://github.com/r-universe/mlampros/actions/runs/26752784460"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "1.1.8",
      "date": "2026-06-01T11:49:42.000Z",
      "arch": "x86_64",
      "commit": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
      "fileid": "63cd52b77a23375d7f78f4b98b71717e2193c2b7c383922b9f75bbaa38bc2f42",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlampros/actions/runs/26752784460"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "1.1.8",
      "date": "2026-06-01T11:49:26.000Z",
      "arch": "x86_64",
      "commit": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
      "fileid": "404a96a7585025c7261c2186f08bbf33efe5cffd9968ebe40c5a300ce29a127d",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlampros/actions/runs/26752784460"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "1.1.8",
      "date": "2026-06-01T11:49:43.000Z",
      "arch": "x86_64",
      "commit": "11c7a8e6690f1fbe2e59df5d0dd451c1bd54a74e",
      "fileid": "4c4ed9bb226ef4d5d4fbe9bcd9d752ba742f582385c5f9bb38bea69dc80b8a0d",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/mlampros/actions/runs/26752784460"
    }
  ]
}