R shows NA, although a value is present

I have two PosixLT columns times without NA values, but NA values ​​appear when checking

> sum(is.na(check$start)) [1] 19 > sum(is.na(check$end)) [1] 23 

Data is present in the cells, so why is this happening? I heard that this can happen with PosixLT, but even when I convert it to posixCT, there is a very strange behavior. How can this be solved?

 > as.POSIXct(check$start, format = "%Y-%m-%d %H:%M:%S", tz = "CST6CDT") [1] NA "2014-03-09 01:35:01 CST" NA "2014-03-09 01:53:30 CST" NA [6] NA NA NA NA "2014-03-09 04:17:11 CDT" [11] NA NA "2015-03-08 01:54:43 CST" NA NA [16] NA NA NA NA NA [21] NA NA NA > dput(check) structure(list(start = structure(list(sec = c(24, 1, 27, 30, 8, 21, 40, 9, 43, 11, 31, 43, 43, 55, 39, 54, 41, 19, 2, 35, 6, 54, 40), min = c(45L, 35L, 14L, 53L, 36L, 37L, 47L, 48L, 54L, 17L, 57L, 53L, 54L, 3L, 52L, 22L, 34L, 28L, 41L, 42L, 52L, 52L, 53L), hour = c(2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), mday = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), mon = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), year = c(114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L), wday = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), yday = c(67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L), isdst = c(-1L, 0L, -1L, 0L, -1L, -1L, -1L, -1L, -1L, 1L, -1L, -1L, 0L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L), zone = c("", "CST", "", "CST", "", "", "", "", "", "CDT", "", "", "CST", "", "", "", "", "", "", "", "", "", ""), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_)), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"), class = c("POSIXlt", "POSIXt"), tzone = c("CST6CDT", "CST", "CDT")), end = structure(list( sec = c(7, 59, 38, 45, 29, 46, 39, 14, 52, 29, 37, 5, 23, 41, 10, 43, 46, 46, 53, 24, 57, 13, 51), min = c(55L, 47L, 30L, 2L, 43L, 51L, 53L, 56L, 54L, 54L, 57L, 56L, 6L, 3L, 13L, 29L, 37L, 32L, 48L, 47L, 55L, 55L, 55L), hour = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), mday = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), mon = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), year = c(114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 114L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L, 115L), wday = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), yday = c(67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L), isdst = c(-1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L, -1L), zone = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_ )), .Names = c("sec", "min", "hour", "mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"), class = c("POSIXlt", "POSIXt"), tzone = c("CST6CDT", "CST", "CDT"))), .Names = c("start", "end"), row.names = c(1559963L, 1560092L, 1560157L, 1560220L, 1560240L, 1560247L, 1560252L, 1560253L, 1560255L, 1560258L, 1560260L, 2004432L, 2004583L, 2004591L, 2004594L, 2004596L, 2004598L, 2004599L, 2004600L, 2004603L, 2004609L, 2004610L, 2004611L), class = "data.frame") 
+7
datetime r posixct
source share
1 answer

How does is.na work in this context?

 > is.na.POSIXlt function (x) is.na(as.POSIXct(x)) <bytecode: 0x0000000014232980> 

How does as.POSIXct behave here?

 > as.POSIXct(check$start) [1] NA "2014-03-09 01:35:01 CST" NA "2014-03-09 01:53:30 CST" [5] NA NA NA NA [9] NA "2014-03-09 04:17:11 CDT" NA NA [13] "2015-03-08 01:54:43 CST" NA NA NA [17] NA NA NA NA [21] NA NA NA 

Good, but WHY ????

Let me check the as.POSIXct document:

Any conversion that needs to go through between two time classes requires a time zone: the transition from "POSIXlt" to "POSIXct" will be the verification time in the selected time zone. One of the problems is what happens in transitions to and from DST, for example, in the UK

We'll see:

 > check$start$zone [1] "" "CST" "" "CST" "" "" "" "" "" "CDT" "" "" "CST" "" "" "" "" "" "" "" [21] "" "" "" 

Here are dragons, there is no time zone, except for 4 entries, therefore as.POSIXct cannot determine if the dates are valid (during the DST change or not?), As you can see:

 > check$start$isdst [1] -1 0 -1 0 -1 -1 -1 -1 -1 1 -1 -1 0 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 

Thus, the conversion between POSIXlt (your dataframe) and POSIXct cannot guess if the date is valid and returns NA.

One fixation method can be to enforce the time zone for all records:

 > check$start <- as.POSIXlt(strftime(check$start,tz="CST"),tz="CST6CDT") > is.na(check$start) [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE 
+8
source share

All Articles