Bar chart with a "negative" logarithmic scale in R

I have a dataset with some outliers, like the following

x <- rnorm(1000,0,20) x <- c(x, 500, -500) 

If we construct this on the linear scale of the X axis, then we see that

 histogram(x) 

non log x-axis

I developed a great way to put it in the log scale using this useful thread: how to use the log scale for the y axis of a histogram in R? :

 mat <- data.frame(x) ggplot(ee, aes(x = xx)) + geom_histogram(colour="darkblue", size=1, fill="blue") + scale_x_log10() 

log x-axis

However, I would like the x-axis labels from this second example to match the labels of the first example, except for the "negative log" view - i.e. the first tick (moving from the center to the left) could be -1, then the next one can be -10, the next one -100, but all the same. It makes sense?

+7
source share
4 answers

I'm not sure I understand your goal, but if you want the log-like transform to still have zeros or negative values, the inverse hyperbolic sine transform of asinh() often a good option. It has a log-like value for large values ​​and is defined for all real values. See this question at stats.stackexchange.com for a discussion, details, and other options.

If this is an acceptable approach, you can create your own scale for ggplot. The code below shows how to create and use a custom scale (with custom gaps), as well as visualizing the asinh () transform.

 library(ggplot2) library(scales) limits <- 100 step <- 0.005 demo <- data.frame(x=seq(from=-1*limits,to=limits,by=step)) asinh_trans <- function(){ trans_new(name = 'asinh', transform = function(x) asinh(x), inverse = function(x) sinh(x)) } ggplot(demo,aes(x,x))+geom_point(size=2)+ scale_y_continuous(trans = 'asinh',breaks=c(-100,-50,-10,-1,0,1,10,50,100))+ theme_bw() 

enter image description here

 ggplot(demo,aes(x,x))+geom_point(size=2)+ scale_x_continuous(trans = 'asinh',breaks=c(0,1,10,50,100))+ scale_y_log10(breaks=c(0,1,10,50,100))+ # zero won't plot xlab("asinh() scale")+ylab("log10 scale")+ theme_bw() 

enter image description here

+14
source

Understanding that the question is quite old, I decided to answer it anyway, since I ran into the same problem.

I see that some of the answers above misunderstood your original question. I think this is the right visualization question, and I will describe below my solution, which I hope will be useful to others.

My approach was to use ggplot and create a custom log transform for the x and y axis (as well as custom fault generators)

 library(ggplot2) library(scales) # Create custom log-style x axis transformer (...,-10,-3,-1,0,1,3,10,...) custom_log_x_trans <- function() trans_new("custom_log_x", transform = function (x) ( sign(x)*log(abs(x)+1) ), inverse = function (y) ( sign(y)*( exp(abs(y))-1) ), domain = c(-Inf,Inf)) # Custom log x breaker (...,-10,-3,-1,0,1,3,10,...) custom_x_breaks <- function(x) { range <- max(abs(x), na.rm=TRUE) return (sort( c(0, sapply(0:log10(range), function(z) (10^z) ), sapply(0:log10(range/3), function(z) (3*10^z) ), sapply(0:log10(range), function(z) (-10^z) ), sapply(0:log10(range/3), function(z) (-3*10^z) ) ))) } # Create custom log-style y axis transformer (0,1,3,10,...) custom_log_y_trans <- function() trans_new("custom_log_y", transform = function (x) ( log(abs(x)+1) ), inverse = function (y) ( exp(abs(y))-1 ), domain = c(0,Inf)) # Custom log y breaker (0,1,3,10,...) custom_y_breaks <- function(x) { max_y <- length(x) range <- max(abs(max_y), na.rm=TRUE) return (sort( c(0, sapply(0:log10(range), function(z) (10^z) ), sapply(0:log10(range/3), function(z) (3*10^z) ) ))) } ggplot(data=mat) + geom_histogram(aes(x=x,fill=..count..), binwidth = 1, color="black", size=0.1) + scale_fill_gradient("Count", low = "steelblue", high = "red") + coord_trans(x="custom_log_x",y="custom_log_y") + scale_x_continuous(breaks = custom_x_breaks(mat$x)) + scale_y_continuous(breaks = custom_y_breaks(mat$x)) + theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + theme_bw() 

which gives me the following chart.

enter image description here

Note that:

  • The plot also includes a coloring scheme to visually display the absolute value of each bar.
  • bins become thinner as x increases (side effect of log conversion)

In any case, two indicators are clearly visible .

+2
source

I found a way to trick him. I say cheat because it actually displays the negative and positive parts of the data separately. Thus, you cannot compare negative and positive data. But only can show the distribution of negative and positive parts separately.

And one of the problems is that your data has zero values, they will not be displayed on the chart.

 reverselog_trans <- function(base = exp(1)) { trans <- function(x) -log(x, base) inv <- function(x) base^(-x) trans_new(paste0("reverselog-", format(base)), trans, inv, log_breaks(base = base), domain = c(1e-100, Inf)) } quartz(); dist1 <- ggplot(data=df.meltFUAC) + geom_point(alpha=1,aes(x=deltaU.deltaUltrasensitivity,y=deltaF.deltaFitness, colour=deltaF.w_c)) + scale_x_continuous(name = expression(Delta * S[ult]), limits=c(1e-7,1),trans = "log10",breaks=c(1e-01,1e-03,1e-05), labels=c("1e-01","1e-03","1e-05")) + scale_y_continuous(name = expression(paste(Delta, " Fitness")),trans = "log10", limits = c(1e-7,1), breaks=c(1e-01,1e-03,1e-05), labels=c("1e-01","1e-03","1e-05")) + theme_bw() + theme(legend.position = "none", axis.title.x=element_blank(),strip.background=element_blank(), panel.border=element_rect(colour = "black"),panel.grid.major=element_blank(), panel.grid.minor=element_blank(),plot.background=element_blank(), plot.margin=unit(c(0,0,0,-11),"mm")) dist2 <- ggplot(data=df.meltFUAC, aes(x=-deltaU.deltaUltrasensitivity,y=deltaF.deltaFitness, colour=deltaF.w_c)) + geom_point(alpha=1) + scale_x_continuous(name = expression(Delta * sqrt(S[ult] %.% S[amp])),limits=c(1,1e-7), trans = reverselog_trans(10),breaks=c(1e-01,1e-03,1e-05), labels=c("-1e-01","-1e-03","-1e-05")) + scale_y_continuous(name = expression(paste(Delta, " Fitness")),trans = "log10", limits = c(1e-7,1), breaks=c(1e-01,1e-03,1e-05), labels=c("1e-01","1e-03","1e-05")) + theme_bw() + theme(legend.position = "none",strip.background=element_blank(),panel.border=element_rect(colour = "black"), axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(), axis.line.y=element_line(colour="black",size=1,linetype="solid"),axis.title.x=element_blank(), panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(), plot.margin=unit(c(0,-8,0,2.5),"mm")) hist0 <- ggplot(data=df.meltFUAC, aes(deltaF.deltaFitness,fill=deltaF.w_c)) + #geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') + geom_density(alpha = 0.5, aes(colour=deltaF.w_c)) + scale_x_continuous(name = expression(paste(Delta, " Fitness")), limits=c(1e-7,1),trans = "log10",breaks=c(1e-01,1e-03,1e-05), labels=c("1e-01","1e-03","1e-05")) + scale_y_continuous(name = "Density", limits=c(0,0.6)) + theme_bw() + theme(legend.position = "none", axis.title.x=element_blank(),strip.background=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.x=element_blank(), panel.border=element_rect(colour = "black"),panel.grid.major=element_blank(), panel.grid.minor=element_blank(),plot.background=element_blank(), plot.margin=unit(c(0,5,2.5,-2.5),"mm")) + coord_flip() hist1 <- ggplot(data=df.meltFUAC, aes(deltaU.deltaUltrasensitivity,fill=deltaF.w_c)) + #geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') + geom_density(alpha = 0.5, aes(colour=deltaF.w_c)) + scale_x_continuous(name = expression(Delta * S[ult]), limits=c(1e-7,1),trans = "log10",breaks=c(1e-01,1e-03,1e-05), labels=c("1e-01","1e-03","1e-05")) + scale_y_continuous(name = "Density", limits=c(0,0.6)) + theme_bw() + theme(legend.position = "none", axis.title.x=element_blank(),strip.background=element_blank(), axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.x=element_blank(), axis.line.x=element_line(colour="black",size=1,linetype="solid"), panel.border=element_rect(colour = "black"),panel.grid.major=element_blank(), panel.grid.minor=element_blank(),plot.background=element_blank(), plot.margin=unit(c(5,0,-2.5,2),"mm")) hist2 <- ggplot(data=df.meltFUAC, aes(-deltaU.deltaUltrasensitivity,fill=deltaF.w_c)) + #geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') + geom_density(alpha = 0.5, aes(colour=deltaF.w_c)) + scale_x_continuous(name = expression(Delta * S[ult]),limits=c(1,1e-7), trans = reverselog_trans(10),breaks=c(1e-01,1e-03,1e-05), labels=c("-1e-01","-1e-03","-1e-05")) + scale_y_continuous(name = "Density", limits=c(0,0.6)) + theme_bw() + theme(legend.position = "none",strip.background=element_blank(),panel.border=element_rect(colour = "black"), axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank(), axis.title.x=element_blank(), axis.line.y=element_line(colour="black",size=1,linetype="solid"), axis.line.x=element_line(colour="black",size=1,linetype="solid"), panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(), plot.margin=unit(c(5,-8,-2.5,2.5),"mm")) grid.newpage(); pushViewport(viewport(layout = grid.layout(3, 3, widths = unit(c(4,4,2),"null"), heights=unit(c(2,7.5,0.5),"null")))); vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y); print(dist2, vp = vplayout(2, 1)); print(dist1, vp = vplayout(2, 2)); print(hist2, vp = vplayout(1, 1)); print(hist1, vp = vplayout(1, 2)); print(hist0, vp = vplayout(2, 3)); grid.text(expression(Delta * Ultrasensitivity),vp = vplayout(3,1:2),x = unit(0.55, "npc"), y = unit(0.9, "npc"),gp=gpar(fontsize=12, col="black")); dev.copy2pdf(file=sprintf("%s/_dist/dist_hist_deltaF_deltaU_wc_01vs10.pdf", resultDir)); dev.off(); 

Here is the graph he got (but you need to manually enable the legend):

enter image description here

Or simpler:

 reverselog_trans <- function(base = exp(1)) { trans <- function(x) -log(x, base) inv <- function(x) base^(-x) trans_new(paste0("reverselog-", format(base)), trans, inv, log_breaks(base = base), domain = c(1e-100, Inf)) } quartz(); hist1 <- ggplot(deltaF, aes(deltaFitness,fill=w_c)) + guides(fill=guide_legend(title=expression(omega[c]))) + geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') + geom_density(alpha = 0.05, aes(colour=w_c)) + scale_x_continuous(name = expression(paste(Delta, " Fitness")),trans = "log10"); hist1 <- hist1 + scale_y_continuous(name = "Density", limits=c(0,1)); #hist1 <- hist1 + theme(panel.background=element_blank(),panel.border=element_blank(),axis.line.x=element_blank(),axis.line.y=element_line(colour="black",linetype="solid",size=1),axis.title.x=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(5,5,0,5),"mm")); hist1 <- hist1 + theme_bw(); hist1 <- hist1 + theme(strip.background=element_blank(),panel.border=element_rect(colour = "black"),axis.title.x=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(5,5,0,5),"mm")); hist1 <- hist1 + scale_color_discrete(name=expression(omega[c]));# + geom_vline(xintercept=0, colour="grey", size = 1);# + geom_hline(yintercept=0, colour="grey", size = 0.5); hist2 <- ggplot(deltaU, aes(deltaUltrasensitivity,fill=w_c)) + geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') + geom_density(alpha = 0.05, aes(colour=w_c)) + scale_x_continuous(name = expression(paste(Delta, " Ultrasensitivity")), limits=c(1e-7,1),trans = "log10",breaks=c(1e-01,1e-03,1e-05),labels=c("1e-01","1e-03","1e-05")); hist2 <- hist2 + scale_y_continuous(name = "Density",limits=c(0,1)) ;#+ geom_vline(xintercept=0, colour="grey", size = 1);# + geom_hline(yintercept=0, colour="grey", size = 0.5); #hist2 <- hist2 + theme(legend.position = "none", axis.title.x=element_blank(),panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(0,5,0,-7.5),"mm")); hist2 <- hist2 + theme_bw(); hist2 <- hist2 + theme(legend.position = "none", axis.title.x=element_blank(),strip.background=element_blank(),panel.border=element_rect(colour = "black"),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(0,5,0,-7.5),"mm")); # + ggtitle("Positive part") hist3 <- ggplot(deltaU, aes(-deltaUltrasensitivity,fill=w_c)) + geom_histogram(alpha = 0.5, aes(y=..density..),position = 'identity') + geom_density(alpha = 0.05, aes(colour=w_c)) + scale_x_continuous(name = expression(paste(Delta, " Ultrasensitivity")), limits=c(1,1e-7),trans = reverselog_trans(10),breaks=c(1e-01,1e-03,1e-05),labels=c("-1e-01","-1e-03","-1e-05")); hist3 <- hist3 + scale_y_continuous(name = "Density", limits=c(0,1));# + geom_hline(yintercept=0, colour="black", size = 0.5); #hist3 <- hist3 + theme(legend.position = "none",panel.background=element_blank(),axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),axis.line.y=element_line(colour="black",size=1,linetype="solid"),axis.title.x=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(0,-7.5,0,5),"mm")); hist3 <- hist3 + theme_bw(); hist3 <- hist3 + theme(legend.position = "none",strip.background=element_blank(),panel.border=element_rect(colour = "black"),axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_blank(),axis.line.y=element_line(colour="black",size=1,linetype="solid"),axis.title.x=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank(),plot.margin=unit(c(0,-7.5,0,5),"mm")); # + ggtitle("Negative part") grid.newpage(); pushViewport(viewport(layout = grid.layout(4, 2, widths = unit(c(5,5),"null"),heights=unit(c(4.6,0.4,4.6,0.4),"null")))); vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y); print(hist1, vp = vplayout(1, 1:2)); # key is to define vplayout grid.text(expression(paste(Delta, " Fitness")),vp = vplayout(2,1:2),x = unit(0.5, "npc"), y = unit(0.9, "npc"),gp=gpar(fontsize=12, col="black")); print(hist3, vp = vplayout(3, 1)); print(hist2, vp = vplayout(3, 2)); grid.text(expression(paste(Delta, " Ultrasensitivity")),vp = vplayout(4,1:2),x = unit(0.5, "npc"), y = unit(0.9, "npc"),gp=gpar(fontsize=12, col="black")); dev.copy2pdf(file=sprintf("%s/deltaF_deltaU_wc_01vs10.pdf", resultDir)); dev.off(); 

Here is the chart I received:

enter image description here

+1
source

Why suffer with ggplot2 solution? Your first plot was done using the histogram lattice histogram , and this is where you should stay. Just apply the logarithmic conversion directly to the histogram function, use the nint argument to indicate the number of histogram bins, and the type argument to select "count" or "density." I think you have everything you need there, but maybe I am missing some important detail of your question ...

 library(lattice) histogram(log10(x), nint=50, type="count") 

enter image description here

0
source

All Articles