Regular expression to save case pattern, capitalization

Is there a regex to save the case pattern in vein \Uand \L?

In the example below, I want to convert "date"to "month", while maintaining the capitalization used ininput

   from        to
  "date" ~~> "month"
  "Date" ~~> "Month"
  "DATE" ~~> "MONTH"

I am currently using three nested calls subto accomplish this.

input <- c("date", "Date", "DATE")
expected.out <- c("month", "Month", "MONTH")

sub("date", "month", 
  sub("Date", "Month", 
    sub("DATE", "MONTH", input)
  )
)

The goal is to have one patternand only one replace, such as

gsub("(date)", "\\Umonth", input, perl=TRUE) 

which will give the desired output

+4
source share
3 answers

This is one of those cases where I think the cycle is forjustified:

input <- rep("Here are a date, a Date, and a DATE",2)
pat <- c("date", "Date", "DATE")
ret <- c("month", "Month", "MONTH")

for(i in seq_along(pat)) { input <- gsub(pat[i],ret[i],input) }
input
#[1] "Here are a month, a Month, and a MONTH" 
#[2] "Here are a month, a Month, and a MONTH"

@flodel, , Reduce:

Reduce(function(str, args) gsub(args[1], args[2], str), 
       Map(c, pat, ret), init = input)

. @TylerRinker.

+6

gsubfn, .

> library(gsubfn)
> x <- 'Here we have a date, a different Date, and a DATE'
> gsubfn('date', list('date'='month','Date'='Month','DATE'='MONTH'), x, ignore.case=T)
# [1] "Here we have a month, a different Month, and a MONTH"
+5

qdap. , :

input <- rep("Here are a date, a Date, and a DATE",2)
pat <- c("date", "Date", "DATE")
ret <- c("month", "Month", "MONTH")


library(qdap)
mgsub(pat, ret, input)

## [1] "Here are a month, a Month, and a MONTH"
## [2] "Here are a month, a Month, and a MONTH"

:

input <- rep("Here are a date, a Date, and a DATE",1000)

library(microbenchmark)

(op <- microbenchmark( 
    GSUBFN = gsubfn('date', list('date'='month','Date'='Month','DATE'='MONTH'), 
             input, ignore.case=T),
    QDAP = mgsub(pat, ret, input),
    REDUCE = Reduce(function(str, args) gsub(args[1], args[2], str), 
       Map(c, pat, ret), init = input),
    FOR = function() {
       for(i in seq_along(pat)) { 
          input <- gsub(pat[i],ret[i],input) 
       }
       input
    },

times=100L))

## Unit: milliseconds
##    expr        min         lq     median         uq        max neval
##  GSUBFN 682.549812 815.908385 847.361883 925.385557 1186.66743   100
##    QDAP  10.499195  12.217805  13.059149  13.912157   25.77868   100
##  REDUCE   4.267602   5.184986   5.482151   5.679251   28.57819   100
##     FOR   4.244743   5.148132   5.434801   5.870518   10.28833   100

enter image description here

+4

All Articles