I reworked @eddi kickass's answer like this:
- This is a function.
- It does not clutter up the workspace (it just loads
data.table) - I renamed the columns to shorter ones and added a few more
- Classes of list items are also checked.
- I return the name of the object and dataset
- Package warning warnings may be hidden
You just run this (if you installed data.table):
ds <- dataset_summary()
It works with a package caretthat was problematic (see change history)
subset(ds,Package == "caret")
Easily subset and search for a specific data set; classes of listelements and columns are counted data.frame.
subset(ds,class == 'list' & df > 0,select=-c(2,4))
# Package Item class nrow ncol char fact ord num int list df
# 225 ecodist iris.fit list NA NA 0 0 0 1 0 0 1
# 238 ElemStatLearn orange10.test list NA NA 0 0 0 0 0 0 50
# 239 ElemStatLearn orange10.train list NA NA 0 0 0 0 0 0 50
# 240 ElemStatLearn orange4.test list NA NA 0 0 0 0 0 0 50
# 241 ElemStatLearn orange4.train list NA NA 0 0 0 0 0 0 50
# 346 lava missingdata list NA NA 0 0 0 0 0 0 4
Workspace is clean
ls()
# [1] "dataset_summary" "ds"
data.table.
search()
# [1] ".GlobalEnv" "package:data.table" "package:Matrix" "package:sp" "package:timeSeries" "package:timeDate"
# [7] "tools:rstudio" "package:stats" "package:graphics" "package:grDevices" "package:utils" "package:datasets"
# [13] "package:methods" "Autoloads" "package:base"
dataset_summary <- function(silent = TRUE){
if(silent){
w <- options()$warn
options(warn = -1)
on.exit(options(warn = w))
}
ws <- ls(envir=.GlobalEnv)
library(data.table)
dt = as.data.table(data(package = .packages(all.available = TRUE))$results)
dt = dt[, `:=`(Item = sub(' \\(.*', '', Item),
Object = sub('.*\\((.*)\\)', '\\1', Item))]
df <- as.data.frame(dt[, {
data(list = Object, package = Package)
d = eval(parse(text = Item))
classes = if (sum(class(d) %in% c('data.frame','list')) > 0) unlist(lapply(d, class))
else NA_integer_
.(class = paste(class(d), collapse = ","),
nrow = if (!is.null(nrow(d))) nrow(d) else NA_integer_,
ncol = if (!is.null(ncol(d))) ncol(d) else NA_integer_,
char = sum(classes == 'character'),
fact = sum(classes == 'factor'),
ord = sum(classes == 'ordered'),
num = sum(classes == 'numeric'),
int = sum(classes == 'integer'),
list = sum(classes == 'list'),
df = sum(classes == 'data.frame'))
}
, by = .(Package, Item)])
rm(list=setdiff(ls(envir=.GlobalEnv),ws),envir=.GlobalEnv)
df
}