Highlight difference between two step data (derived from histogram) in R

48 Views Asked by At

:-)

I have two datasets where I derived histogram data from. These are both saved in two seperate arrays. The current sourcecode can be found below, along with the current plot.

# DEMO file for the awesome stackoverflow community
require(plotrix)

# clear the global environment ----
rm(list=ls())

# Assign demo data ----
data_T  <- c(rep(1,5),rep(2,10),rep(3,8),rep(4,2),rep(5,7),rep(6,2),rep(7,13),rep(9,7))
data_P  <- c(rep(1,1),rep(2,4),rep(3,1),rep(4,7),rep(5,12),rep(6,10),rep(7,9),rep(10,2))

# Setting the limits of the result data ----
uxlimit                     <- 10
lxlimit                     <- 0
classes                     <- (uxlimit-lxlimit)
xtics                       <- seq(lxlimit,uxlimit)
uylimit                     <- 20
lylimit                     <- 0
yrange                      <- seq(lylimit,uylimit, by = 5)

# filter out of necessary ----
data_T [ data_T > uxlimit ] <- NaN
data_T [ data_T < lxlimit ] <- NaN
data_T                      <- na.omit(data_T)

# Setting the x-label and y-label according to the requested spectrum ----
xlabel                      <- "x-value / x-unit"
ylabel                      <- "y-value / y-unit"

# generate histogram data ----
data_T_hist <- hist(data_T,
                    breaks = seq(lxlimit,uxlimit,l = classes+1),
                    plot = F)

data_P_hist <- hist(data_P,
                    breaks = seq(lxlimit,uxlimit,l = classes+1),
                    plot = F)

# Plot data_T_hist ----
plot(data_T_hist$breaks,
     c(data_T_hist$counts,0),
     xlab=xlabel,
     ylab=ylabel,
     ylim = c(lylimit,uylimit),
     xlim = c(lxlimit,uxlimit),
     main="Histogram data",
     axes=F,
     type="s",
     col="red",
     lwd=4,
     panel.first = grid(nx=NULL, ny=NULL))

# Plot data_P_hist ----
lines(data_P_hist$breaks,
      c(data_P_hist$counts,0),
        xlab=xlabel,
        ylab=ylabel,
        ylim = c(lylimit,uylimit),
        xlim = c(lxlimit, uxlimit),
        type="s",
        col="blue",
        lwd=4,
        lty=2)

# Frame all plots with a solid border ----
box(which = "plot", lty = "solid")

# Add legend to the top right of all plots ----
legend("topright",
       c("data_T_hist", "data_P_hist"),
       col=c("red","blue"),
       bg = "white",
       lwd=4)

# Setting the axes right ----
axis(1, at=xtics, labels=NULL, tick = TRUE)
axis(2, at=yrange, labels=yrange, las=1)

# FINISHED! ----
message ("Finished!")

The resulting plot is like this (and should be reproducable by you): I cant link images yet, so here is the link

So, this is okay for now.

However, I want to highlight the difference in the histograms visually. Of course I can calculate the difference, which is good because I need that too, but I also want to highlight the differences to show the the interesting areas. The final picture should look something like this Again the link

I would not necessarily need a color-discrimination between positive and negative difference, it would be nice though. I have no idea how to shade the areas in between the step data.

Can someone help me with this? One more thing, due to some restrictions, I am not allowed to use too many additional packages. I am using "R version 3.1.1 (2014-07-10) -- "Sock it to Me""

Thank you so much in advance!

2

There are 2 best solutions below

1
On BEST ANSWER

Not an elegant solution but nonetheless it does what you require.

 #Get pairwise min
    y_low <-c(pmin(data_P_hist$counts, data_T_hist$counts),0)

    #Get pairwise max 
    y_high <- c(pmax(data_P_hist$counts, data_T_hist$counts),0)



    for(i in 2:length(xtics)-1){
      rect(xtics[i], y_low[i], xtics[i+1],y_high[i], col="powderblue", border = NA)
    }

This is the plot you get:

enter image description here

Hope it helps!

0
On

To discriminate between positive and negative difference, I added a little condition to the code. It's working like a charm! The complete code is below

# DEMO file for the awesome stackoverflow community

# TikzDevice is required to produce .tex files ----
require(plotrix)

# clear the global environment ----
rm(list=ls())

# Assign demo data ----
data_T  <- c(rep(1,5),rep(2,10),rep(3,8),rep(4,2),rep(5,7),rep(6,2),rep(7,13),rep(9,7))
data_P  <- c(rep(1,1),rep(2,4),rep(3,1),rep(4,7),rep(5,12),rep(6,10),rep(7,9),rep(10,2))

# Setting the limits of the result data ----
uxlimit                     <- 10
lxlimit                     <- 0
classes                     <- (uxlimit-lxlimit)
xtics                       <- seq(lxlimit,uxlimit)
uylimit                     <- 20
lylimit                     <- 0
yrange                      <- seq(lylimit,uylimit, by = 5)

# filter out of necessary ----
data_T [ data_T > uxlimit ] <- NaN
data_T [ data_T < lxlimit ] <- NaN
data_T                      <- na.omit(data_T)

# Setting the x-label and y-label according to the requested spectrum ----
xlabel                      <- "x-value / x-unit"
ylabel                      <- "y-value / y-unit"

# generate histogram data ----
data_T_hist <- hist(data_T,
                    breaks = seq(lxlimit,uxlimit,l = classes+1),
                    plot = F)

data_P_hist <- hist(data_P,
                    breaks = seq(lxlimit,uxlimit,l = classes+1),
                    plot = F)

# Plot data_T_hist ----
plot(data_T_hist$breaks,
     c(data_T_hist$counts,0),
     xlab=xlabel,
     ylab=ylabel,
     ylim = c(lylimit,uylimit),
     xlim = c(lxlimit,uxlimit),
     main="Histogram data",
     axes=F,
     type="s",
     col="red",
     lwd=4,
     panel.first = grid(nx=NULL, ny=NULL))

#Get pairwise min
y_low <-c(pmin(data_T_hist$counts, data_P_hist$counts),0)

#Get pairwise max
y_high <- c(pmax(data_T_hist$counts, data_P_hist$counts),0)

for(i in 2:length(xtics)-1){
  if (data_T_hist$counts[i] < data_P_hist$counts[i]) {
    colselect <- "powderblue"
  } else {
    colselect <- "sienna1"
  }
  rect(xtics[i], y_low[i], xtics[i+1],y_high[i], col=colselect, border = NA)
}

# Plot data_P_hist ----
lines(data_P_hist$breaks,
      c(data_P_hist$counts,0),
        xlab=xlabel,
        ylab=ylabel,
        ylim = c(lylimit,uylimit),
        xlim = c(lxlimit, uxlimit),
        type="s",
        col="blue",
        lwd=4,
        lty=2)

# Plot data_P_hist again to keep borders in the background
lines(data_T_hist$breaks,
      c(data_T_hist$counts,0),
      xlab=xlabel,
      ylab=ylabel,
      ylim = c(lylimit,uylimit),
      xlim = c(lxlimit, uxlimit),
      type="s",
      col="red",
      lwd=4,
      lty=2)

# Frame all plots with a solid border ----
box(which = "plot", lty = "solid")

# Add legend to the top right of all plots ----
legend("topright",
       c("data_T_hist", "data_P_hist"),
       col=c("red","blue"),
       bg = "white",
       lwd=4)

# Setting the axes right ----
axis(1, at=xtics, labels=NULL, tick = TRUE)
axis(2, at=yrange, labels=yrange, las=1)

# FINISHED! ----
message ("Finished!")