, stringi. , . stringi R, , , , . , . , stringi . ( , , data.frame .)
: Rui Barradas .
(i) , stringi, , , , .
(ii) , , , ( ). , , . , , , ..
, stringi .
Please correct me if I watched anything fair comparison (especially, the solution stringrcould be improved with code, I think, but I'm not so familiar with the package, so I saved the proposed solution).
library(stringi)
library(stringr)
library(microbenchmark)
strings <- c("rfoutputtablep7q10000t20000c100",
"rfoutputtablep7q1000t20000c100",
"svmLinear2outputtablep7q20000t20000c100",
"svmLinear2outputtablep7q5000t20000c100")
split_to_df <- function(string, splititems, colidschar, firstcolname, replsplit_tonames) {
data <- as.data.frame(do.call(rbind
,stri_split_regex(strings, paste(splititems, collapse = "|")))
,stringsAsFactors = FALSE)
names(data) <- c(firstcolname, stri_replace_all_regex(splititems, replsplit_tonames, ""))
numericcols <- setdiff(1:ncol(data), colidschar)
data[,numericcols] <- lapply(data[,numericcols], as.numeric)
return(data)
}
stringi_approach_complete <- function() {
df <- split_to_df(string = strings
,splititems = c("outputtablep(?=\\d)", "q(?=\\d)", "t(?=\\d)", "c(?=\\d)")
,colidschar = 1
,firstcolname = "A"
,replsplit_tonames = "\\(.*\\)|outputtable")
}
stringi_approach_compare <- function() {
data <- as.data.frame(do.call(rbind, stri_split_regex(strings, c("outputtable|p(?=\\d)|q(?=\\d)|t(?=\\d)|c(?=\\d)"))))
names(data) <- c("A", "p", "q", "t", "c")
}
stringr_approach <- function() {
res <- data.frame(p = str_extract(str_extract(strings, "p\\d+"), "\\d+"),
q = str_extract(str_extract(strings, "q\\d+"), "\\d+"),
t = str_extract(str_extract(strings, "t\\d+"), "\\d+"),
c = str_extract(str_extract(strings, "c\\d+"), "\\d+"))
}
base_approach1 <- function() {
res <- do.call(rbind, strsplit(strings, 'outputtable|p|q|t|c'))
res <- as.data.frame(res[, -2])
names(res) <- c("A", "p", "q", "t", "c")
}
base_approach2 <- function() {
df <- setNames(data.frame(do.call(rbind, strsplit(strings, 'outputtable\\D|p|q|t|c'))), c("A", "p", "q", "t", "c"))
}
microbenchmark(
base_approach1(),
base_approach2(),
stringi_approach_compare(),
stringr_approach(),
stringi_approach_complete()
)