a <- as.Date(data$initialDiagnose,format="%m/%d/%Y") # Produces NA when format is not "%m/%d/%Y" b <- as.Date(data$initialDiagnose,format="%d.%m.%Y") # Produces NA when format is not "%d.%m.%Y" a[is.na(a)] <- b[!is.na(b)] # Combine both while keeping their ranks data$initialDiagnose <- a # Put it back in your dataframe data$initialDiagnose [1] "2009-01-14" "2005-09-22" "2010-04-21" "2010-01-28" "2009-01-09" "2005-03-28" "2005-01-04" "2005-01-04" "2010-09-17" "2010-01-03"
In addition to this, the previous method is adapted to the situation when you have three (or more) different formats:
data$initialDiagnose [1] 14.01.2009 9/22/2005 12 Mar 97 4/21/2010 28.01.2010 09.01.2009 3/28/2005 Levels: 09.01.2009 12 Mar 97 14.01.2009 28.01.2010 3/28/2005 4/21/2010 9/22/2005 multidate <- function(data, formats){ a<-list() for(i in 1:length(formats)){ a[[i]]<- as.Date(data,format=formats[i]) a[[1]][!is.na(a[[i]])]<-a[[i]][!is.na(a[[i]])] } a[[1]] } data$initialDiagnose <- multidate(data$initialDiagnose, c("%m/%d/%Y","%d.%m.%Y","%d %b %y")) data$initialDiagnose [1] "2009-01-14" "2005-09-22" "1997-03-12" "2010-04-21" "2010-01-28" "2009-01-09" "2005-03-28"
plannapus
source share