R summarizes data with unique features

I have a large table in the following format:

Data <- data.frame("Chrom" = c("chr1", "chr1", "chr1", "chr4", "chr4", "chr6"), "Site" = c(100, 200, 400, 140, 300, 400), "Heart" = c(20, 100, 0, 35, 92, 100), "Brain" = c(30, 40, 55, 100, 0, 100), "Liver" = c(100, 55, 20, 90, 0, 0), "Lungs" = c(100, 0, 80, 40, 30, 0))

Donation:

> Data
  Chrom Site Heart Brain Liver Lungs
1  chr1  100    20    30   100   100
2  chr1  200   100    40    55     0
3  chr1  400     0    55    20    80
4  chr4  140    35   100    90    40
5  chr4  300    92     0     0    30
6  chr6  400   100   100     0     0

I want to make a shape similar to this published shape. ( http://www.nature.com/ncomms/2015/150218/ncomms7363/fig_tab/ncomms7363_F1.html ):

enter image description here

Basically for each line (based on a common Chrom and site), I want to see how many intermediate values ​​there are. I define here intermediate values ​​from 15 to 85. Then for each organ I would like to know how many rows are intermediate in all organs, only for this organ, separated by two or three organs.

+4
source share
2 answers

Power mapping data.table:

Customization

library(data.table)

Data <- data.frame("Chrom" = c("chr1", "chr1", "chr1", "chr4", "chr4", "chr6"), "Site" = c(100, 200, 400, 140, 300, 400), "Heart" = c(20, 100, 0, 35, 92, 100), "Brain" = c(30, 40, 55, 100, 0, 100), "Liver" = c(100, 55, 20, 90, 0, 0), "Lungs" = c(100, 0, 80, 40, 30, 0))

DT <- data.table(Data)

isintermediate <- function(x){
  return(x >=15 & x <= 85)
}


DI <- DT[ , list(Chrom, Site,
                 Heart = isintermediate(Heart),
                 Brain = isintermediate(Brain),
                 Liver = isintermediate(Liver),
                 Lungs = isintermediate(Lungs))]

This creates a matrix DIthat looks like this:

> DI
   Chrom Site Heart Brain Liver Lungs
1:  chr1  100  TRUE  TRUE FALSE FALSE
2:  chr1  200 FALSE  TRUE  TRUE FALSE
3:  chr1  400 FALSE  TRUE  TRUE  TRUE
4:  chr4  140  TRUE FALSE FALSE  TRUE
5:  chr4  300 FALSE FALSE FALSE  TRUE
6:  chr6  400 FALSE FALSE FALSE FALSE

TRUE FALSE, . (, , , , ).

Chrom + Site

# NoI is Number Intermediate

> DI[, list(NoI = Heart + Brain + Liver + Lungs), by = c("Chrom","Site")]
   Chrom Site NoI
1:  chr1  100   2
2:  chr1  200   2
3:  chr1  400   3
4:  chr4  140   2
5:  chr4  300   1
6:  chr6  400   0

. -, , reshape

library(reshape2)

DA <- melt(DI, id.vars = c("Chrom","Site"))[value == TRUE]

:

> DA
    Chrom Site variable value
 1:  chr1  100    Heart  TRUE
 2:  chr4  140    Heart  TRUE
 3:  chr1  100    Brain  TRUE
 4:  chr1  200    Brain  TRUE
 5:  chr1  400    Brain  TRUE
 6:  chr1  200    Liver  TRUE
 7:  chr1  400    Liver  TRUE
 8:  chr1  400    Lungs  TRUE
 9:  chr4  140    Lungs  TRUE
10:  chr4  300    Lungs  TRUE

TRUE, [value == TRUE]

, . .N by=, :

DA <- merge(DA,DA[, list(IAcc = .N), by = c("Chrom","Site")], by = c("Chrom","Site"))

:

> DA
    Chrom Site variable value IAcc
 1:  chr1  100    Heart  TRUE    2
 2:  chr1  100    Brain  TRUE    2
 3:  chr1  200    Brain  TRUE    2
 4:  chr1  200    Liver  TRUE    2
 5:  chr1  400    Brain  TRUE    3
 6:  chr1  400    Liver  TRUE    3
 7:  chr1  400    Lungs  TRUE    3
 8:  chr4  140    Heart  TRUE    2
 9:  chr4  140    Lungs  TRUE    2
10:  chr4  300    Lungs  TRUE    1

, , - IAcc , table:

Output <- data.table(table(DA[,list(variable,IAcc)]))
> Output
    variable IAcc N
 1:    Heart    1 0
 2:    Brain    1 0
 3:    Liver    1 0
 4:    Lungs    1 1
 5:    Heart    2 2
 6:    Brain    2 2
 7:    Liver    2 1
 8:    Lungs    2 1
 9:    Heart    3 0
10:    Brain    3 1
11:    Liver    3 1
12:    Lungs    3 1

IAcc - ( ), Chrom Site, N - , .

, ( ):

library(ggplot2)

ggplot(Output, aes(x = variable, y = N, fill = IAcc)) + geom_bar(stat = "identity")

enter image description here

+4

( ), - :

is_intermediate = function(x) {
    return(x < 85 & x > 15)
}
res = sapply(Data[, 2:length(Data)], is_intermediate)
rowSums(res)

dplyr + tidyr, - :

Data %>% gather(organ, value, Heart:lungs) %>%
    group_by(Chrom, Site) %>%
    summarise(n_intermediate = sum(is_intermediate(value)))

Chrom/Site.

- :

Data %>% select(-Chrom, -Site) %>%
    mutate_each(funs(is_intermediate)) %>%
    summarise_each(funs(sum))

.

+2

All Articles