Overlaying geom_points on geom_histogram or stat_bin

I used ggplot to plot a histogram (or step using stat_bin ) and stat_bin few points to it using geom_point .

Here is a base implementation:

 library(plotrix) set.seed(10) df <- data.frame(id=LETTERS,val=rnorm(length(LETTERS))) selected.ids <- sample(LETTERS,3,replace=F) h <- hist(df$val,plot=F,breaks=10) cols <- sapply(rainbow(length(selected.ids)),function(x) color.id(x)[1]) selected.df <- data.frame(id=selected.ids,col=cols,stringsAsFactors=F) selected.df$x <- df$val[which(df$id %in% selected.ids)] selected.df <- selected.df[order(selected.df$x),] selected.df$y <- h$counts[findInterval(selected.df$x,h$breaks)] selected.df$col <- factor(selected.df$col,levels=cols) plot(h) segments(x0=selected.df$x,x1=selected.df$x,y0=selected.df$y,y1=selected.df$y,cex=18,lwd=8,col=selected.df$col) 

which gives:

enter image description here

However, when I try ggplot :

 ggplot(df,aes(x=val))+geom_histogram(bins=10,colour="black",alpha=0,fill="#FF6666")+geom_point(data=selected.df,aes(x=x,y=y,colour=factor(col)),size=2)+scale_fill_manual(values=levels(selected.df$col),labels=selected.df$id,name="id")+scale_colour_manual(values=levels(selected.df$col),labels=selected.df$id,name="id") 

Points and histogram are offset: enter image description here

Ideally, I would like to build it with a step:

 ggplot(df,aes(x=val))+stat_bin(geom="step",bins=10)+geom_point(data=selected.df,aes(x=x,y=y,colour=factor(col)),size=2)+scale_fill_manual(values=levels(selected.df$col),labels=selected.df$id,name="id")+scale_colour_manual(values=levels(selected.df$col),labels=selected.df$id,name="id") 

Which is similar to geom_histogram

enter image description here

but I would also like the ends of the line to touch the line y = 0.

So, did I get it right step by step using stat_bin?

+5
source share
1 answer

Your selected.df y.values is executed using breaks hist() , but geom_histogram() uses different breaks . ( geom_histogram(bins) not equivalent to hist(breaks) to be sure). In addition, on the chart, a step up occurs at average breaks . ggplot_build(gg.obj)$data (or plot(gg.obj)$data ) gives you some information, breaks, calculations, etc.

geom_histgram
The method is basically the same as base.plot . If you want the same result as base.plot , use breaks = h$breaks instead of bars = 10 .

 # a common part to base and ggplot2 library(plotrix) set.seed(10) df <- data.frame(id = LETTERS, val = rnorm(length(LETTERS))) selected.ids <- sample(LETTERS, 3, replace = F) cols <- sapply(rainbow(length(selected.ids)), function(x) color.id(x)[1]) selected.df <- data.frame(id=selected.ids, col=cols, stringsAsFactors = F) selected.df$x <- df$val[which(df$id %in% selected.ids)] selected.df <- selected.df[order(selected.df$x),] selected.df$col <- factor(selected.df$col, levels=cols) 

 # (1) make a histogram g <- ggplot(df, aes(x = val)) + geom_histogram(bins = 10, colour = "black", alpha = 0, fill = "#FF6666") # base; h <- hist(df$val, plot = F, breaks = 10) # (2) get its breaks g.data <- ggplot_build(g)$data[[1]] g.breaks <- c(g.data$xmin, tail(g.data$xmax, n=1)) # base; h$breaks # (3) get counts of specific x values selected.df$y <- g.data$count[findInterval(selected.df$x, g.breaks)] # base; selected.df$y <- h$counts[findInterval(selected.df$x,h$breaks)] # (4) draw g + geom_point(data = selected.df, aes(x = x, y = y, colour = factor(col)), size = 2) + scale_fill_manual(values = levels(selected.df$col), labels = selected.df$id,name = "id") + scale_colour_manual(values = levels(selected.df$col), labels = selected.df$id, name = "id") 

enter image description here

stat_bin
You can draw it just like geom_histgram . The important point is that up and down does not come from breaks, but from average values.

 selected.df2 <- selected.df # (1) make a step plot s <- ggplot(df, aes(x = val)) + stat_bin(geom = "step", bins = 10) # (2) get breaks and its middle values s.data <- ggplot_build(s)$data[[1]] s.breaks <- c(s.data$xmin, tail(s.data$xmax, n=1)) s.mid.breaks <- s.data$x # (3) get counts of specific x values using middle values of breaks. selected.df2$y <- s.data$count[findInterval(selected.df2$x, s.mid.breaks)] # (4) add a new levels into breaks to start and end at y=0 s.add.breaks <- c(s.breaks[1] - 1.0E-6, # making lower levels is easy s.breaks, tail(s.breaks, n=1) + diff(s.breaks[1:2])) # upper need the same range # (5) draw ggplot(df, aes(x = val)) + stat_bin(geom = "step", breaks = s.add.breaks) + geom_point(data = selected.df2, aes(x = x, y = y, colour = factor(col)), size = 2) + scale_fill_manual(values = levels(selected.df2$col), labels = selected.df2$id, name = "id") + scale_colour_manual(values = levels(selected.df2$col), labels = selected.df2$id, name="id") 

enter image description here

+2
source

All Articles