Calculate p-value from a distinct frequency

46 Views Asked by At

I am looking for the p-value at the frequency close to 90 days in my 365-day timeseries, but the Lomb package only calculates the highest power p-value:

ts = runif(365, 3, 18)
library(lomb)
lsp(ts,to=365,ofac=10,plot=FALSE,type='period')

The root code maybe help us. I have tried to change the values in pbaluev() function but when I modified it to the highest peak data, the output value is different from the p.value generated.

theme_lsp=function (bs=18){
  theme_bw(base_size =bs,base_family="sans")+ theme(plot.margin = unit(c(.5,.5,.5,.5 ), "cm"))+
    theme( axis.text.y =element_text (colour="black", angle=90, hjust=0.5,size=14),
           axis.text.x = element_text(colour="black",size=14),
           axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0)),
           axis.title.x = element_text(margin = margin(t = 15, r = 0, b = 0, l = 0)),
           panel.grid.major = element_blank(), panel.grid.minor = element_blank())
}

lsp <- function (x, times = NULL, from = NULL, to = NULL, type = c("frequency", "period"), ofac = 1, alpha = 0.01, normalize=c("standard","press"), plot = TRUE, ...) {   
  
  type <- match.arg(type)
  normalize<-match.arg(normalize)
  
  
  if (ofac != floor(ofac)) {
    ofac <- floor(ofac)
    warning("ofac coerced to integer")
  }
  if (ofac < 1) {
    ofac <- 1
    warning("ofac must be integer >=1. Set to 1")
  }
  
  
  if (!is.null(times)) {
    if (!is.vector(times)) 
      stop("no multivariate methods available")
    if (length(x) != length(times)) 
      stop("Length of data and times vector must be equal")
    names <- c(deparse(substitute(times)), deparse(substitute(x)))
  }
  
  if (is.null(times) && is.null(ncol(x))) {
    names <- c("Time", deparse(substitute(x)))
    times <- 1:length(x)
  }
  
  if (is.matrix(x) || is.data.frame(x)) {
    
    if (ncol(x) > 2) 
      stop("no multivariate methods available")
    
    if (ncol(x) == 2) {
      names <- colnames(x)
      times <- x[, 1]
      x <- x[, 2]
    }
  }
  
  times <- times[!is.na(x)]
  x <- x[!is.na(x)]
  
  nobs <- length(x)
  if (nobs < 2) 
    stop("time series must have at least two observations")
  
  times <- as.numeric(times)
  start <- min(times)
  end <- max(times)
  av.int <- mean(diff(times))
  
  o <- order(times)
  times <- times[o]
  x <- x[o]
  
  y <- cbind(times, x)
  colnames(y) <- names
  
  datanames <- colnames(y)
  t <- y[, 1]
  y <- y[, 2]
  
  
  n <- length(y)
  tspan <- t[n] - t[1]
  fr.d <- 1/tspan
  step <- 1/(tspan * ofac)
  
  if (type == "period") {
    hold <- from
    from <- to
    to <- hold
    if (!is.null(from)) 
      from <- 1/from
    if (!is.null(to)) 
      to <- 1/to
  }
  
  if (is.null(to)) {
    f.max <- floor(0.5 * n * ofac) * step
  } else {
    f.max <- to
  }
  
  freq <- seq(fr.d, f.max, by = step)
  if (!is.null(from)) 
    freq <- freq[freq >= from]
  n.out <- length(freq)
  if (n.out == 0) 
    stop("erroneous frequency range specified ")
  
  x <- t * 2 * pi
  y <- y - mean(y)
  if (normalize=="standard") {
    norm=1/sum(y^2)
  } else if (normalize=="press"){
    norm <- 1/(2 * var(y))
  } else {
    stop ("normalize must be 'standard' or 'press'")
  }    
  
  w <- 2 * pi * freq
  PN <- rep(0, n.out)
  for (i in 1:n.out) {
    wi <- w[i]
    tau <- 0.5 * atan2(sum(sin(wi * t)), sum(cos(wi * t)))/wi
    arg <- wi * (t - tau)
    cs <- cos(arg)
    sn <- sin(arg)
    A <- (sum(y * cs))^2
    B <- sum(cs * cs)
    C <- (sum(y * sn))^2
    D <- sum(sn * sn)
    PN[i] <- A/B + C/D
  }
  
  PN <- norm * PN
  
  
  PN.max <- max(PN)
  peak.freq <- freq[PN == PN.max]
  if (type == "period") 
    peak.at <- c(1/peak.freq, peak.freq) else peak.at <- c(peak.freq, 1/peak.freq)
  
  scanned <- if (type == "frequency") 
    freq else 1/freq
  if (type == "period") {
    scanned <- scanned[n.out:1]
    PN <- PN[n.out:1]
  }
  if (normalize=="press"){
    effm <- 2 * n.out/ofac
    level <- -log(1 - (1 - alpha)^(1/effm))
    exPN <- exp(-PN.max)
    p <- effm * exPN
    if (p > 0.01)  p <- 1 - (1 - exPN)^effm
  }
  
  if (normalize=="standard"){
    
    
    fmax<-max(freq)
    Z<-PN.max
    tm=t
    
    p<-pbaluev (Z,fmax,tm=t)
    level=fibsearch(levopt,0,1,alpha,fmax=fmax,tm=t)$xmin
    
  }
  
  
  sp.out <- list(normalize=normalize, scanned = scanned, power = PN, data = datanames, n = n,
                 type = type, ofac = ofac, n.out = n.out, alpha = alpha,
                 sig.level = level, peak = PN.max, peak.at = peak.at, p.value = p, fmax = max(freq), Z = PN.max, tm = t,
                 PN = PN)
  
  class(sp.out) <- "lsp"
  
  if (plot) {
    plot(sp.out, ...)
    return(invisible(sp.out))
  } else {
    return(sp.out)}
}


plot.lsp <- function(x, main ="Lomb-Scargle Periodogram", xlabel = NULL, ylabel = "normalized power", level = TRUE, plot=TRUE, ...) {  
  
  if (is.null(xlabel)) 
    xlabel <- x$type
  scn=pow=NULL
  dfp=data.frame(x$scanned,x$power)
  names(dfp)=c("scn","pow")
  p=ggplot(data=dfp,aes(x=scn,y=pow))+geom_line()
  
  if (level == TRUE) {
    if (!is.null(x$sig.level)) {
      
      p=p+geom_hline(yintercept=x$sig.level, linetype="dashed",color="blue")
      p=p+annotate("text",x=max(x$scanned)*0.85,y=x$sig.level*1.05,label=paste("P<",x$alpha),size=6,vjust=0)
    }
  }
  
  p=p+ggtitle(main)
  p=p+ ylab(ylabel)
  p=p+ xlab(xlabel)
  p=p+theme_lsp(20)
  if (plot==T) print(p)
  return (p)
}


summary.lsp <- function(object,...) {
  first <- object$type
  if (first == "frequency") {
    second <- "At period"
  } else {
    second <- "At frequency"
  }
  first <- paste("At ", first)
  from <- min(object$scanned)
  to <- max(object$scanned)
  
  Value <- c(object$data[[1]], object$data[[2]], object$n, object$type, object$ofac, from, to, object$n.out, object$peak, object$peak.at[[1]], object$peak.at[[2]], object$p.value,object$normalize)
  options(warn = -1)
  for (i in 1:length(Value)) {
    if (!is.na(as.numeric(Value[i]))) 
      Value[i] <- format(as.numeric(Value[i]), digits = 5)
  }
  options(warn = 0)
  nmes <- c("Time", "Data", "n", "Type", "Oversampling", "From", "To", "# frequencies", "PNmax", first, second, "P-value (PNmax)", "normalized")
  report <- data.frame(Value, row.names = nmes)
  report
}



randlsp <- function(repeats=1000, x,times = NULL, from = NULL, to = NULL, type = c("frequency", "period"), ofac = 1, alpha = 0.01, plot = TRUE, trace = TRUE, ...) {
  
  if (is.ts(x)){
    x=as.vector(x)
  }
  
  if (!is.vector(x)) {
    times <- x[, 1]
    x <- x[, 2]
  }
  
  
  realres <- lsp(x, times, from, to, type, ofac, alpha,plot = plot, ...)
  realpeak <- realres$peak
  classic.p <-realres$p.value
  pks <- NULL
  if (trace == TRUE) 
    cat("Repeats: ")
  for (i in 1:repeats) {
    randx <- sample(x, length(x))  # scramble data sequence
    randres <- lsp(randx, times, from, to, type, ofac, alpha, plot = F)
    pks <- c(pks, randres$peak)
    if (trace == TRUE) {
      if (i/25 == floor(i/25)) 
        cat(i, " ")
    }
  }
  if (trace == TRUE) 
    cat("\n")
  
  prop <- length(which(pks >= realpeak))
  p.value <- prop/repeats
  p.value=round(p.value,digits=3)
  
  if (plot == TRUE) {
    
    p1=plot(realres,main="LS Periodogram",level=F)
    
    
    dfp=data.frame(pks)
    names(dfp)="peaks"
    p2=ggplot(data=dfp,aes(x=peaks))+geom_histogram(color="black",fill="white")
    p2=p2+geom_vline(aes(xintercept=realpeak),color="blue", linetype="dotted", size=1)
    p2=p2+theme_lsp(20)
    p2=p2+ xlab("peak amplitude")
    p2=p2+ggtitle(paste("P-value= ",p.value))
    
    
    suppressMessages(grid.arrange(p1,p2,nrow=1))
  }
  
  res=realres[-(8:9)]
  res=res[-length(res)]
  res$random.peaks = pks
  res$repeats=repeats
  res$p.value=p.value
  res$classic.p=classic.p
  class(res)="randlsp"
  return(invisible(res))
}

summary.randlsp <- function(object,...) {
  first <- object$type
  if (first == "frequency") {
    second <- "At period"
  } else {
    second <- "At frequency"
  }
  first <- paste("At ", first)
  from <- min(object$scanned)
  to <- max(object$scanned)
  
  Value <- c(object$data[[1]], object$data[[2]], object$n, object$type, object$ofac, from, to, length(object$scanned), object$peak, object$peak.at[[1]], object$peak.at[[2]], object$repeats,object$p.value)
  options(warn = -1)
  for (i in 1:length(Value)) {
    if (!is.na(as.numeric(Value[i]))) 
      Value[i] <- format(as.numeric(Value[i]), digits = 5)
  }
  options(warn = 0)
  nmes <- c("Time", "Data", "n", "Type", "Oversampling", "From", "To", "# frequencies", "PNmax", first, second, "Repeats","P-value (PNmax)")
  report <- data.frame(Value, row.names = nmes)
  report
}

ggamma <- function(N){
  return (sqrt(2 / N) * exp(lgamma(N / 2) - lgamma((N - 1) / 2)))
}

pbaluev <- function(Z,fmax,tm) {
  #code copied from astropy timeseries
  N=length(tm)
  Dt=mean(tm^2)-mean(tm)^2
  NH=N-1
  NK=N-3
  fsingle=(1 - Z) ^ (0.5 * NK)
  Teff = sqrt(4 * pi * Dt) # Effective baseline
  W = fmax * Teff
  tau=ggamma(NH) * W * (1 - Z) ^ (0.5 * (NK - 1))*sqrt(0.5 * NH * Z)
  p=-(exp(-tau)-1) + fsingle * exp(-tau)
  return(p)
}


levopt<-  function(x,alpha,fmax,tm){
  prob=pbaluev(x,fmax,tm)
  (log(prob)-log(alpha))^2
}

pershow=function(object){
  datn=data.frame(period=object$scanned,power=object$power)
  plot_ly(data=datn,type="scatter",mode="lines+markers",linetype="solid",
          x=~period,y=~power)
}


getpeaks=function (object,npeaks=5,plotit=TRUE){
  
  pks=findpeaks(object$power,npeaks=npeaks,minpeakheight=0,sortstr=TRUE) 
  peaks=pks[,1] 
  tmes=object$scanned[pks[,2]]
  tme=round(tmes,2)
  
  p=plot.lsp(object)
  p=p+ylim(0,peaks[1]*1.2)
  for (i in 1:npeaks){
    p=p+annotate("text", label=paste(tme[i]),y=peaks[i],x=tme[i],color="red",angle=45,size=6,vjust=-1,hjust=-.1)
  }
  
  d=data.frame(time=tme,peaks=peaks)
  result=list(data=d,plot=p)
  return(result)
}
0

There are 0 best solutions below