R: how to access array elements loaded into data elements

From the csv file, I loaded the date into the R framework, which looks like this:

> head(mydata)
  row lengthArray                         sports num_runs percent_runs
1   0           4               [24, 18, 24, 18]        0            0
2   1          10 [2, 2, 2, 2, 2, 2, 2, 2, 2, 2]        0            0
3   2           4                   [0, 0, 0, 0]        0            0
4   3           2                         [0, 0]        0            0
5   4           2                       [18, 18]        0            0
6   5           1                            [0]        0            0

I can access and get types for whole data frames without problems, but I cannot figure out how to access sports:

> class(mydata[4,3])
[1] "factor" 
>  string_factor = mydata[1,3]
> string_factor
[1] [24, 18, 24, 18]
6378 Levels: [0] [0, 0] [0, 0, 0] [0, 0, 0, 0] ... [9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9]
> class(string_factor)
[1] "factor"
> string_factor_numeric = as.numeric(string_factor)
> string_factor_numeric
[1] 5181

I think R's best answer would be β€œnot to do this,” but the data is doing this, so I wonder how I can get these numbers from an array so that I can use them.

I should also note that this convert data.frame columns from factors to characters did not give any error message, but had no effect as the array column continued to be classified as factors.

UPDATE: from the comments, you can see this can get you somewhere:
mydata[,3]  <- as.character(mydata[,3])

However, this still will not lead you to an array with individually accessible elements.

+4
3

dput:

mydata = structure(list(row = 0:5, lengthArray = c(4L, 10L, 4L, 2L, 2L, 
1L), sports = structure(c(6L, 5L, 1L, 2L, 4L, 3L), .Label = c("[0, 0, 0, 0]", 
"[0, 0]", "[0]", "[18, 18]", "[2, 2, 2, 2, 2, 2, 2, 2, 2, 2]", 
"[24, 18, 24, 18]"), class = "factor"), num_runs = c(0L, 0L, 
0L, 0L, 0L, 0L), percent_runs = c(0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("row", 
"lengthArray", "sports", "num_runs", "percent_runs"), class = "data.frame", row.names = c(NA, 
-6L))

mydata$sports = as.character(mydata$sports)

( )

library(stringr)
mydata$sports = str_replace_all(mydata$sports, pattern = "\\[|\\]| ", "")

, , .

library(tidyr)
mydata = separate(mydata, sports, into = paste0("sport", 1:max(mydata$lengthArray)), sep = ",", extra = "drop")

mydata
#  row lengthArray sport1 sport2 sport3 sport4 sport5 sport6 sport7 sport8 sport9 sport10 num_runs percent_runs
#1   0           4     24     18     24     18   <NA>   <NA>   <NA>   <NA>   <NA>    <NA>        0            0
#2   1          10      2      2      2      2      2      2      2      2      2       2        0            0
#3   2           4      0      0      0      0   <NA>   <NA>   <NA>   <NA>   <NA>    <NA>        0            0
#4   3           2      0      0   <NA>   <NA>   <NA>   <NA>   <NA>   <NA>   <NA>    <NA>        0            0
#5   4           2     18     18   <NA>   <NA>   <NA>   <NA>   <NA>   <NA>   <NA>    <NA>        0            0
#6   5           1      0   <NA>   <NA>   <NA>   <NA>   <NA>   <NA>   <NA>   <NA>    <NA>        0            0
+1

, splitstackshape:

library(splitstackshape)
library(dplyr)
mydata %>% 
  mutate(sports = gsub("\\[|\\]", "", sports)) %>%
  cSplit("sports", sep = ",", direction = "wide")

:

   row lengthArray num_runs percent_runs sports_01 sports_02 sports_03 sports_04 sports_05 sports_06 sports_07 sports_08 sports_09 sports_10
1:   0           4        0            0        24        18        24        18        NA        NA        NA        NA        NA        NA
2:   1          10        0            0         2         2         2         2         2         2         2         2         2         2
3:   2           4        0            0         0         0         0         0        NA        NA        NA        NA        NA        NA
4:   3           2        0            0         0         0        NA        NA        NA        NA        NA        NA        NA        NA
5:   4           2        0            0        18        18        NA        NA        NA        NA        NA        NA        NA        NA
6:   5           1        0            0         0        NA        NA        NA        NA        NA        NA        NA        NA        NA

@thelatemail :

library(stringi)
df <- mydata %>%
  mutate(sports = as.list(stri_extract_all(sports, regex = "[:digit:]")))

:

> str(df)
#'data.frame':  6 obs. of  5 variables:
# $ row         : int  0 1 2 3 4 5
# $ lengthArray : int  4 10 4 2 2 1
# $ sports      :List of 6
#  ..$ : chr  "2" "4" "1" "8" ...
#  ..$ : chr  "2" "2" "2" "2" ...
#  ..$ : chr  "0" "0" "0" "0"
#  ..$ : chr  "0" "0"
#  ..$ : chr  "1" "8" "1" "8"
#  ..$ : chr "0"
# $ num_runs    : int  0 0 0 0 0 0
# $ percent_runs: int  0 0 0 0 0 0 

:

> df$sports[[1]][1] #first element of first list
#[1] "2"
+4

:

text = "
row lengthArray                            sports num_runs percent_runs
   0           4               '[24, 18, 24, 18]'        0            0
   1          10 '[2, 2, 2, 2, 2, 2, 2, 2, 2, 2]'        0            0
   2           4                   '[0, 0, 0, 0]'        0            0
   3           2                         '[0, 0]'        0            0
   4           2                       '[18, 18]'        0            0
   5           1                            '[0]'        0            0"

data <- read.table(text = text, header= TRUE)

, ... sports, :

data$sports <- as.character(data$sports)
data$sports <- lapply(data$sports, function(x) eval(parse(text = paste0("c(", gsub("\\[|\\]", "", x),")"))))

, , sports:

data$sports[[1]][[3]]
[1] 24
0

All Articles