Meteo_pull_monitors for mean climate data by lat/long

161 Views Asked by At

I have a dataframe of latitudes, longitudes, start years and end years. I want mean precipitation for each location for that period.

Right now, I can get this for one location at a time, but I want to automate the following for multiple locations:

Here are some prerequisites:

#library(xts)
#library(rnoaa)
#options(noaakey = "...") # https://ropensci.org/blog/2014/03/13/rnoaa/ says how to get a API key
#station_data <- ghcnd_stations() # Takes a while to run
statenv <- new.env()
lat_lon_df<-structure(list(lat = c(41.1620277777778, 44.483333, 44.066667
), long = c(-96.4115, -92.533333, -93.5), yrmin = c(2001L, 1983L, 
                                                    1982L), yrmax = c(2010L, 1990L, 1992L), id = c("ithaca", "haycreek", 
                                                                                                   "waseca")), class = "data.frame", row.names = c(1389L, 1395L, 
                                                                                                                                                   1403L))

And here is the meat.

ll_df<-lat_lon_df[1,]
nearby_station<-meteo_nearby_stations(lat_lon_df = ll_df,
    lat_colname = "lat", lon_colname = "long",
    station_data = station_data, radius = 50, year_min=ll_df[1,"yrmin"],
    year_max=ll_df[1,"yrmax"],limit=1, var="PRCP")


nearby_station<-meteo_nearby_stations(lat_lon_df = ll_df,lat_colname = "lat", lon_colname = "long",
                                          station_data = station_data, radius = 50, year_min=ll_df[1,"yrmin"],
                                          year_max=ll_df[1,"yrmin"],limit=1, var="PRCP")
e <- lapply(nearby_station,function(x)  meteo_pull_monitors(x$id[1])) #get actual data based on monitor id's

ll<-xts(e[[1]]$prcp,order.by=e[[1]]$date)
x<-paste0(ll_df[1,"yrmin"],"/",ll_df[1,"yrmax"]) 
 mean(xts::apply.yearly(na.omit(ll[x]),sum))/10 #divide by 10, put in mm

This returns 776.23. End result should be a dataframe that now has a new column "precip" like this:

     lat      long yrmin yrmax       id    precip
41.16203 -96.41150  2001  2010   ithaca    776.23
44.48333 -92.53333  1983  1990 haycreek    829.65
44.06667 -93.50000  1982  1992   waseca    894.62

There has to be a way to get this to simply repeat by row of lat_long_df, i.e for lat_lon_df[1,], then lat_lon_df[2,], and finally lat_lon_df[3,].

1

There are 1 best solutions below

0
On BEST ANSWER

One approach would be to apply a custom function over the rows of lat_lon_df.

Here is an example:

library(xts)
library(rnoaa)

Set the API key

#options(noaakey = "...") # https://ropensci.org/blog/2014/03/13/rnoaa/ says how to get a API key

station_data <- ghcnd_stations() #meta-information about all available GHCND weather stations

Now apply all the steps you described within an apply call

out <- apply(lat_lon_df, 1, function(x){
  min_year <- x[3] #extract the needed values min_year, max_year and ll_df
  max_year <- x[4] 
  ll_df <- data.frame(lat = as.numeric(x[1]),
                      long = as.numeric(x[2]),
                      id = x[5])
  nearby_station <- meteo_nearby_stations(lat_lon_df = ll_df,
                                          lat_colname = "lat",
                                          lon_colname = "long",
                                          station_data = station_data,
                                          radius = 50,
                                          year_min = min_year,
                                          year_max = max_year,
                                          limit=1,
                                          var="PRCP")
  res <- lapply(nearby_station, function(y) {
    res <- meteo_pull_monitors(y[1]$id)
    }
    )
  ll <- xts(res[[1]]$prcp, order.by=res[[1]]$date)
  x <- paste0(min_year <- x[3],"/",max_year) 
  mean(xts::apply.yearly(na.omit(ll[x]),sum))/10
}
)

data.frame(lat_lon_df, precip = out)
#output
          lat      long yrmin yrmax       id   precip
1389 41.16203 -96.41150  2001  2010   ithaca 776.2300
1395 44.48333 -92.53333  1983  1990 haycreek 829.6500
1403 44.06667 -93.50000  1982  1992   waseca 894.6273

Do note that when yrmin and yrmax do not change one can just get the needed info by using meteo_nearby_stations on lat_lon_df.

You can also define this as a named function

get_mean_precip <- function(x){
  min_year <- x[3]
  max_year <- x[4]
  ll_df <- data.frame(lat = as.numeric(x[1]),
                      long = as.numeric(x[2]),
                      id = x[5])
  nearby_station <- rnoaa::meteo_nearby_stations(lat_lon_df = ll_df,
                                                 lat_colname = "lat",
                                                 lon_colname = "long",
                                                 station_data = station_data,
                                                 radius = 50,
                                                 year_min = min_year,
                                                 year_max = max_year,
                                                 limit=1,
                                                 var = "PRCP")
  res <- lapply(nearby_station, function(y) {
    res <- rnoaa::meteo_pull_monitors(y[1]$id)
  }
  )
  ll <- xts::xts(res[[1]]$prcp, order.by=res[[1]]$date)
  x <- paste0(min_year <- x[3],"/",max_year) 
  mean(xts::apply.yearly(na.omit(ll[x]),sum))/10
}

and use it as:

out <- apply(lat_lon_df, 1, get_mean_precip)