How to add columns for animal passage in R

58 Views Asked by At

I am trying to summarize our detection data in a way that I can easily see when an animal moves from one pool to another. Here is an example of one animal that I track

    tibble [22 x 13] (S3: tbl_df/tbl/data.frame)
     $ Receiver     : chr [1:22] "VR2Tx-480679" "VR2Tx-480690" "VR2Tx-480690" "VR2Tx-480690" ...
     $ Transmitter  : chr [1:22] "A69-9001-12418" "A69-9001-12418" "A69-9001-12418" "A69-9001-12418" ...
     $ Species      : chr [1:22] "PDFH" "PDFH" "PDFH" "PDFH" ...
     $ LocalDATETIME: POSIXct[1:22], format: "2021-05-28 07:16:52" ...
     $ StationName  : chr [1:22] "1405U" "1406U" "1406U" "1406U" ...
     $ LengthValue  : num [1:22] 805 805 805 805 805 805 805 805 805 805 ...
     $ WeightValue  : num [1:22] 8.04 8.04 8.04 8.04 8.04 8.04 8.04 8.04 8.04 8.04 ...
     $ Sex          : chr [1:22] "NA" "NA" "NA" "NA" ...
     $ Translocated : num [1:22] 0 0 0 0 0 0 0 0 0 0 ...
     $ Pool         : num [1:22] 16 16 16 16 16 16 16 16 16 16 ...
     $ DeployDate   : POSIXct[1:22], format: "2018-06-05" ...
     $ Latitude     : num [1:22] 41.6 41.6 41.6 41.6 41.6 ...
     $ Longitude    : num [1:22] -90.4 -90.4 -90.4 -90.4 -90.4 ...

I want to add columns that would allow me to summarize this data in a way that I would have the start date of when an animal was in a pool and when the animal moved to a different pool it would have the end date of when it exits.

Ex: Enters Pool 19 on 1/1/22, next detected in Pool 20 on 1/2/22, so there would be columns that say fish entered and exited Penter image description hereool 19 on 1/1/22 and 1/2/22. I have shared an Excel file example of what I am trying to do. I would like to code upstream movement with a 1 and downstream movement with 0.

I have millions of detections and hundreds of animals that I monitor so I am trying to find a way to look at passages for each animal. Thank you!

Here is my dataset using dput:

structure(list(Receiver = c("VR2Tx-480679", "VR2Tx-480690", "VR2Tx-480690", 
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", 
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", 
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480692", "VR2Tx-480695", 
"VR2Tx-480695", "VR2Tx-480713", "VR2Tx-480713", "VR2Tx-480702", 
"VR100", "VR100", "VR100"), Transmitter = c("A69-9001-12418", 
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
"A69-9001-12418"), Species = c("PDFH", "PDFH", "PDFH", "PDFH", 
"PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", 
"PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", 
"PDFH", "PDFH"), LocalDATETIME = structure(c(1622186212, 1622381700, 
1622384575, 1622184711, 1622381515, 1622381618, 1622381751, 1622381924, 
1622382679, 1622383493, 1622384038, 1622384612, 1622183957, 1622381515, 
1626905954, 1626905688, 1622971975, 1622970684, 1626929618, 1624616880, 
1626084540, 1626954660), tzone = "UTC", class = c("POSIXct", 
"POSIXt")), StationName = c("1405U", "1406U", "1406U", "1406U", 
"1406U", "1406U", "1406U", "1406U", "1406U", "1406U", "1406U", 
"1406U", "1406U", "1404L", "1401D", "1401D", "14Aux2", "14Aux2", 
"15.Mid.Wall", "man_loc", "man_loc", "man_loc"), LengthValue = c(805, 
805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 
805, 805, 805, 805, 805, 805, 805, 805), WeightValue = c(8.04, 
8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 
8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04), 
    Sex = c("NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
    "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
    "NA", "NA", "NA"), Translocated = c(0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Pool = c(16, 
    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 
    16, 16, 16, 14, 14, 16), DeployDate = structure(c(1528156800, 
    1528156800, 1528156800, 1528156800, 1528156800, 1528156800, 
    1528156800, 1528156800, 1528156800, 1528156800, 1528156800, 
    1528156800, 1528156800, 1528156800, 1528156800, 1528156800, 
    1528156800, 1528156800, 1528156800, 1528156800, 1528156800, 
    1528156800), tzone = "UTC", class = c("POSIXct", "POSIXt"
    )), Latitude = c(41.57471, 41.5758, 41.5758, 41.5758, 41.5758, 
    41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 
    41.5758, 41.57463, 41.5731, 41.5731, 41.57469, 41.57469, 
    41.57469, 41.57469, 41.57469, 41.57469), Longitude = c(-90.39944, 
    -90.39793, -90.39793, -90.39793, -90.39793, -90.39793, -90.39793, 
    -90.39793, -90.39793, -90.39793, -90.39793, -90.39793, -90.39793, 
    -90.39984, -90.40391, -90.40391, -90.40462, -90.40462, -90.40462, 
    -90.40462, -90.40462, -90.40462)), row.names = c(NA, -22L
), class = c("tbl_df", "tbl", "data.frame"))
> dput(T12418)
structure(list(Receiver = c("VR2Tx-480679", "VR2Tx-480690", "VR2Tx-480690", 
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", 
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", 
"VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480692", "VR2Tx-480695", 
"VR2Tx-480695", "VR2Tx-480713", "VR2Tx-480713", "VR2Tx-480702", 
"VR100", "VR100", "VR100"), Transmitter = c("A69-9001-12418", 
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
"A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
"A69-9001-12418"), Species = c("PDFH", "PDFH", "PDFH", "PDFH", 
"PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", 
"PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", 
"PDFH", "PDFH"), LocalDATETIME = structure(c(1622186212, 1622381700, 
1622384575, 1622184711, 1622381515, 1622381618, 1622381751, 1622381924, 
1622382679, 1622383493, 1622384038, 1622384612, 1622183957, 1622381515, 
1626905954, 1626905688, 1622971975, 1622970684, 1626929618, 1624616880, 
1626084540, 1626954660), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    StationName = c("1405U", "1406U", "1406U", "1406U", "1406U", 
    "1406U", "1406U", "1406U", "1406U", "1406U", "1406U", "1406U", 
    "1406U", "1404L", "1401D", "1401D", "14Aux2", "14Aux2", "15.Mid.Wall", 
    "man_loc", "man_loc", "man_loc"), LengthValue = c(805, 805, 
    805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 
    805, 805, 805, 805, 805, 805, 805, 805), WeightValue = c(8.04, 
    8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 
    8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 
    8.04), Sex = c("NA", "NA", "NA", "NA", "NA", "NA", "NA", 
    "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
    "NA", "NA", "NA", "NA", "NA"), Translocated = c(0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    Pool = c(16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 
    16, 16, 16, 16, 16, 16, 16, 14, 14, 16), DeployDate = structure(c(1528156800, 
    1528156800, 1528156800, 1528156800, 1528156800, 1528156800, 
    1528156800, 1528156800, 1528156800, 1528156800, 1528156800, 
    1528156800, 1528156800, 1528156800, 1528156800, 1528156800, 
    1528156800, 1528156800, 1528156800, 1528156800, 1528156800, 
    1528156800), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    Latitude = c(41.57471, 41.5758, 41.5758, 41.5758, 41.5758, 
    41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 
    41.5758, 41.57463, 41.5731, 41.5731, 41.57469, 41.57469, 
    41.57469, 41.57469, 41.57469, 41.57469), Longitude = c(-90.39944, 
    -90.39793, -90.39793, -90.39793, -90.39793, -90.39793, -90.39793, 
    -90.39793, -90.39793, -90.39793, -90.39793, -90.39793, -90.39793, 
    -90.39984, -90.40391, -90.40391, -90.40462, -90.40462, -90.40462, 
    -90.40462, -90.40462, -90.40462)), class = c("tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -22L))

Animal 12418 data

1

There are 1 best solutions below

5
On

Here is one possibility for getting the beginning date for entering a pool and ending date for leaving a pool. First, I group by Species (could also add additional grouping variables to distinguish between specimens) and arrange by the time. Then, I look for any changes to the Pool using cumsum. Then, I pull the first date recorded for the pool as the the date that they entered the pool. Then, I do some grouping and ungrouping to grab the date from the next group (i.e., the date the species left the pool) and then copy that date for the whole group. For determining upstream/downstream, we can use case_when inside of mutate. I'm also assuming that you want this to match the date, so I have filled in the values for each group with the movement for pool change.

library(tidyverse)
  
df_dates <- df %>%
  group_by(Species, Transmitter) %>%
  arrange(Species, Transmitter, LocalDATETIME) %>%
  mutate(changeGroup = cumsum(Pool != lag(Pool, default = -1))) %>%
  group_by(Species, Transmitter, changeGroup) %>%
  mutate(EnterPool = first(format(as.Date(LocalDATETIME), "%m/%d/%Y"))) %>%
  ungroup(changeGroup) %>%
  mutate(LeftPool = lead(EnterPool)) %>%
  group_by(Species, Transmitter, changeGroup) %>%
  mutate(LeftPool = last(LeftPool)) %>% 
  ungroup(changeGroup) %>% 
  mutate(stream = case_when((Pool - lag(Pool)) > 0 ~ 0,
                            (Pool - lag(Pool)) < 0 ~ 1)) %>% 
  fill(stream, .direction = "down")

Output

print(as_tibble(df_dates[1:24, c(1:5, 10:17)]), n=24)

# A tibble: 24 × 13
   Receiver     Transmitter    Species LocalDATETIME       StationName  Pool DeployDate          Latitude Longitude changeGroup EnterPool  LeftPool   stream
   <chr>        <chr>          <chr>   <dttm>              <chr>       <dbl> <dttm>                 <dbl>     <dbl>       <int> <chr>      <chr>       <dbl>
 1 VR2Tx-480690 A69-9001-12418 PDFH    2021-05-28 06:39:17 1406U          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
 2 VR2Tx-480690 A69-9001-12418 PDFH    2021-05-28 06:51:51 1406U          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
 3 VR2Tx-480679 A69-9001-12418 PDFH    2021-05-28 07:16:52 1405U          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
 4 VR2Tx-480690 A69-9001-12418 PDFH    2021-05-30 13:31:55 1406U          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
 5 VR2Tx-480692 A69-9001-12418 PDFH    2021-05-30 13:31:55 1404L          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
 6 VR2Tx-480690 A69-9001-12418 PDFH    2021-05-30 13:33:38 1406U          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
 7 VR2Tx-480690 A69-9001-12418 PDFH    2021-05-30 13:35:00 1406U          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
 8 VR2Tx-480690 A69-9001-12418 PDFH    2021-05-30 13:35:51 1406U          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
 9 VR2Tx-480690 A69-9001-12418 PDFH    2021-05-30 13:38:44 1406U          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
10 VR2Tx-480690 A69-9001-12418 PDFH    2021-05-30 13:51:19 1406U          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
11 VR2Tx-480690 A69-9001-12418 PDFH    2021-05-30 14:04:53 1406U          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
12 VR2Tx-480690 A69-9001-12418 PDFH    2021-05-30 14:13:58 1406U          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
13 VR2Tx-480690 A69-9001-12418 PDFH    2021-05-30 14:22:55 1406U          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
14 VR2Tx-480690 A69-9001-12418 PDFH    2021-05-30 14:23:32 1406U          16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
15 VR2Tx-480713 A69-9001-12418 PDFH    2021-06-06 09:11:24 14Aux2         16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
16 VR2Tx-480713 A69-9001-12418 PDFH    2021-06-06 09:32:55 14Aux2         16 2018-06-05 00:00:00     41.6     -90.4           1 05/28/2021 06/25/2021     NA
17 VR100        A69-9001-12418 PDFH    2021-06-25 10:28:00 man_loc        14 2018-06-05 00:00:00     41.6     -90.4           2 06/25/2021 07/21/2021      1
18 VR100        A69-9001-12418 PDFH    2021-07-12 10:09:00 man_loc        14 2018-06-05 00:00:00     41.6     -90.4           2 06/25/2021 07/21/2021      1
19 VR2Tx-480695 A69-9001-12418 PDFH    2021-07-21 22:14:48 1401D          16 2018-06-05 00:00:00     41.6     -90.4           3 07/21/2021 NA              0
20 VR2Tx-480695 A69-9001-12418 PDFH    2021-07-21 22:19:14 1401D          16 2018-06-05 00:00:00     41.6     -90.4           3 07/21/2021 NA              0
21 VR2Tx-480702 A69-9001-12418 PDFH    2021-07-22 04:53:38 15.Mid.Wall    16 2018-06-05 00:00:00     41.6     -90.4           3 07/21/2021 NA              0
22 VR100        A69-9001-12418 PDFH    2021-07-22 11:51:00 man_loc        16 2018-06-05 00:00:00     41.6     -90.4           3 07/21/2021 NA              0
23 AR100        B80-9001-12420 PDFH    2021-07-22 11:51:00 man_loc        19 2018-06-05 00:00:00     42.6     -90.4           1 07/22/2021 07/22/2021     NA
24 AR100        B80-9001-12420 PDFH    2021-07-22 11:51:01 man_loc        18 2018-06-05 00:00:00     42.6     -90.4           2 07/22/2021 NA              1

Data

df <- structure(list(Receiver = c("VR2Tx-480679", "VR2Tx-480690", "VR2Tx-480690", 
                            "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", 
                            "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480690", 
                            "VR2Tx-480690", "VR2Tx-480690", "VR2Tx-480692", "VR2Tx-480695", 
                            "VR2Tx-480695", "VR2Tx-480713", "VR2Tx-480713", "VR2Tx-480702", 
                            "VR100", "VR100", "VR100", "AR100", "AR100"), Transmitter = c("A69-9001-12418", 
                                                                        "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
                                                                        "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
                                                                        "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
                                                                        "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
                                                                        "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", "A69-9001-12418", 
                                                                        "A69-9001-12418", "B80-9001-12420", "B80-9001-12420"), Species = c("PDFH", "PDFH", "PDFH", "PDFH", 
                                                                                                       "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", 
                                                                                                       "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", "PDFH", 
                                                                                                       "PDFH", "PDFH", "PDFH", "PDFH"), LocalDATETIME = structure(c(1622186212, 1622381700, 
                                                                                                                                                    1622384575, 1622184711, 1622381515, 1622381618, 1622381751, 1622381924, 
                                                                                                                                                    1622382679, 1622383493, 1622384038, 1622384612, 1622183957, 1622381515, 
                                                                                                                                                    1626905954, 1626905688, 1622971975, 1622970684, 1626929618, 1624616880, 
                                                                                                                                                    1626084540, 1626954660, 1626954661, 1626954660), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
               StationName = c("1405U", "1406U", "1406U", "1406U", "1406U", 
                               "1406U", "1406U", "1406U", "1406U", "1406U", "1406U", "1406U", 
                               "1406U", "1404L", "1401D", "1401D", "14Aux2", "14Aux2", "15.Mid.Wall", 
                               "man_loc", "man_loc", "man_loc", "man_loc", "man_loc"), LengthValue = c(805, 805, 
                                                                                 805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 805, 
                                                                                 805, 805, 805, 805, 805, 805, 805, 805, 805, 805), WeightValue = c(8.04, 
                                                                                                                                          8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 
                                                                                                                                          8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 8.04, 
                                                                                                                                          8.04, 8.04, 8.04), Sex = c("NA", "NA", "NA", "NA", "NA", "NA", "NA", 
                                                                                                                                                         "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
                                                                                                                                                         "NA", "NA", "NA", "NA", "NA", "NA", "NA"), Translocated = c(0, 0, 0, 
                                                                                                                                                                                                         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               Pool = c(16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 
                        16, 16, 16, 16, 16, 16, 16, 14, 14, 16, 18, 19), DeployDate = structure(c(1528156800, 
                                                                                          1528156800, 1528156800, 1528156800, 1528156800, 1528156800, 
                                                                                          1528156800, 1528156800, 1528156800, 1528156800, 1528156800, 
                                                                                          1528156800, 1528156800, 1528156800, 1528156800, 1528156800, 
                                                                                          1528156800, 1528156800, 1528156800, 1528156800, 1528156800, 
                                                                                          1528156800, 1528156800, 1528156800), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
               Latitude = c(41.57471, 41.5758, 41.5758, 41.5758, 41.5758, 
                            41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 41.5758, 
                            41.5758, 41.57463, 41.5731, 41.5731, 41.57469, 41.57469, 
                            41.57469, 41.57469, 41.57469, 41.57469, 42.57469, 42.57469), Longitude = c(-90.39944, 
                                                                                   -90.39793, -90.39793, -90.39793, -90.39793, -90.39793, -90.39793, 
                                                                                   -90.39793, -90.39793, -90.39793, -90.39793, -90.39793, -90.39793, 
                                                                                   -90.39984, -90.40391, -90.40391, -90.40462, -90.40462, -90.40462, 
                                                                                   -90.40462, -90.40462, -90.40462, -90.40470, -90.40470)), class = c("tbl_df", "tbl", 
                                                                                                                                "data.frame"), row.names = c(NA, -24L))