Lead and lag multiple values together in same new column

87 Views Asked by At

Here is my data:

structure(list(date = structure(c(19662, 19663, 19664, 19665, 
19666), class = "Date"), tmax = c(12, 13, 12, 11, 15)), class = "data.frame", row.names = c(NA, 
-5L))

        date tmax
2023-11-01   12
2023-11-02   13
2023-11-03   12
2023-11-04   11
2023-11-05   15

What is want to achieve is the following:

        date tmax tmax_climate
2023-11-01   12           NA
2023-11-01   12           NA
2023-11-01   12           12
2023-11-01   12           13
2023-11-01   12           12
2023-11-02   13           NA
2023-11-02   13           12
2023-11-02   13           13
2023-11-02   13           12
2023-11-02   13           11
2023-11-03   12           12
2023-11-03   12           13
2023-11-03   12           12
2023-11-03   12           11
2023-11-03   12           15
2023-11-04   11           13
2023-11-04   11           12
2023-11-04   11           11
2023-11-04   11           15
2023-11-04   11           NA
2023-11-05   15           12
2023-11-05   15           11
2023-11-05   15           15
2023-11-05   15           NA
2023-11-05   15           NA

Basically what I want is to get tmax from two previous days and tmax for two next days. I've tried rollapply(), lead() and lag() but with no luck so far. Preferably I would like to stick to dplyr. Order doesn't matter in tmax_climate (no need to be tmax-1, tmax, tmax+1 as in desired output)

3

There are 3 best solutions below

0
On BEST ANSWER
library(dplyr)
df |>
  reframe(ref_date = seq.Date(date-2, date+2, "day"), .by = c(date, tmax)) |>
  left_join(df |> rename(climate = tmax), join_by(ref_date == date))

Result

         date tmax   ref_date climate
1  2023-11-01   12 2023-10-30      NA
2  2023-11-01   12 2023-10-31      NA
3  2023-11-01   12 2023-11-01      12
4  2023-11-01   12 2023-11-02      13
5  2023-11-01   12 2023-11-03      12
6  2023-11-02   13 2023-10-31      NA
7  2023-11-02   13 2023-11-01      12
8  2023-11-02   13 2023-11-02      13
9  2023-11-02   13 2023-11-03      12
10 2023-11-02   13 2023-11-04      11
11 2023-11-03   12 2023-11-01      12
12 2023-11-03   12 2023-11-02      13
13 2023-11-03   12 2023-11-03      12
14 2023-11-03   12 2023-11-04      11
15 2023-11-03   12 2023-11-05      15
16 2023-11-04   11 2023-11-02      13
17 2023-11-04   11 2023-11-03      12
18 2023-11-04   11 2023-11-04      11
19 2023-11-04   11 2023-11-05      15
20 2023-11-04   11 2023-11-06      NA
21 2023-11-05   15 2023-11-03      12
22 2023-11-05   15 2023-11-04      11
23 2023-11-05   15 2023-11-05      15
24 2023-11-05   15 2023-11-06      NA
25 2023-11-05   15 2023-11-07      NA
1
On

One dplyr option:

library(dplyr)
library(tidyr)

df %>%
  mutate(lag1 = lag(tmax),
         lead1 = lead(tmax),
         current = tmax) %>%
  pivot_longer(cols = c(lag1, lead1, current),
               values_to = 'tmax_climate') %>%
  select(-name)
#> # A tibble: 15 × 3
#>    date        tmax tmax_climate
#>    <date>     <dbl>        <dbl>
#>  1 2023-11-01    12           NA
#>  2 2023-11-01    12           13
#>  3 2023-11-01    12           12
#>  4 2023-11-02    13           12
#>  5 2023-11-02    13           12
#>  6 2023-11-02    13           13
#>  7 2023-11-03    12           13
#>  8 2023-11-03    12           11
#>  9 2023-11-03    12           12
#> 10 2023-11-04    11           12
#> 11 2023-11-04    11           15
#> 12 2023-11-04    11           11
#> 13 2023-11-05    15           11
#> 14 2023-11-05    15           NA
#> 15 2023-11-05    15           15
0
On

in Base R, use the following:

n <- 2
m <- length(df$tmax)
x <- c(rep(NA, n), df$tmax, rep(NA, n))
data.frame(df[rep(seq(m), each=m),], 
                     tmax_climate = x[sequence(rep(m,m), seq(m))],
                     row.names = NULL)

         date tmax tmax_climate
1  2023-11-01   12           NA
2  2023-11-01   12           NA
3  2023-11-01   12           12
4  2023-11-01   12           13
5  2023-11-01   12           12
6  2023-11-02   13           NA
7  2023-11-02   13           12
8  2023-11-02   13           13
9  2023-11-02   13           12
10 2023-11-02   13           11
11 2023-11-03   12           12
12 2023-11-03   12           13
13 2023-11-03   12           12
14 2023-11-03   12           11
15 2023-11-03   12           15
16 2023-11-04   11           13
17 2023-11-04   11           12
18 2023-11-04   11           11
19 2023-11-04   11           15
20 2023-11-04   11           NA
21 2023-11-05   15           12
22 2023-11-05   15           11
23 2023-11-05   15           15
24 2023-11-05   15           NA
25 2023-11-05   15           NA

Another way is to use embed once you have m and x above:

c(embed(x, m)[, m:1])
 [1] NA NA 12 13 12 NA 12 13 12 11 12 13 12 11 15 13 12 11 15 NA 12 11 15 NA NA