For loop for centroid function (geosphere package)

79 Views Asked by At

I'm trying to create a for loop to return the centroid positions from a data frame. Basically I've got 451 unique tracking ID's in this dataframe, each with about 125 observations of longitude and latitude. I want to create a dataframe that gives me the centroid lon and lat for all 451 ID's. The approach I'm using is creating an empty data frame then running a for loop to store all the results in that dataframe.

Here's what I've written:

forage.central <- as.tibble(data.frame(matrix(ncol = 2, nrow = 451)))
for(i in unique(tow_dat$track_ID)){
  current_data <- tow_dat %>%  filter(track_ID == i) %>%
    dplyr::select(lon,lat)

  print(centroid(current_data))
  forage.central[i] <- centroid(current_data)
}

And I'm getting this output:

      lon       lat
[1,] 151.9351 -27.58116
Error in `[<-`:
! Can't recycle input of size 2 to size 1.
Run `rlang::last_trace()` to see where the error occurred.

Any suggestions?

1

There are 1 best solutions below

1
margusl On

Despite specifically asking for for-loops and geospeher, I'd suggest using sf instead, current de facto R library for spatial (vector) data. With sf your problem automagically turns into a simple group_by() / summarise() task:

library(dplyr)
library(readr)
library(sf)
#> Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
library(mapview)

##  import data
# exsmple dataset from https://www.movebank.org/cms/webapp?gwt_fragment=page=studies,path=study1917386905
tracks <- read_csv("Reindeer movement in East Iceland.csv") %>% 
  select(lon = `location-long`, lat = `location-lat`, track_id = `individual-local-identifier`)

glimpse(tracks)
#> Rows: 21,346
#> Columns: 3
#> $ lon      <dbl> -15.50317, -15.49686, -15.50248, -15.50913, -15.51636, -15.51…
#> $ lat      <dbl> 65.19598, 65.16754, 65.16415, 65.16519, 65.16563, 65.16222, 6…
#> $ track_id <chr> "ANA", "ANA", "ANA", "ANA", "ANA", "ANA", "ANA", "ANA", "ANA"…

# dataframe to sf object, with crs we define 
# which coordinate reference system is used for input
tracks_sf <- tracks %>% 
  st_as_sf(coords = c("lon", "lat"), crs = "WGS84")
print(tracks_sf, n = 5)
#> Simple feature collection with 21346 features and 1 field
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: -16.13069 ymin: 64.67822 xmax: -14.31203 ymax: 65.88164
#> Geodetic CRS:  WGS 84
#> # A tibble: 21,346 × 2
#>   track_id             geometry
#> * <chr>             <POINT [°]>
#> 1 ANA      (-15.50317 65.19598)
#> 2 ANA      (-15.49686 65.16754)
#> 3 ANA      (-15.50248 65.16415)
#> 4 ANA      (-15.50913 65.16519)
#> 5 ANA      (-15.51636 65.16563)
#> # ℹ 21,341 more rows

# 21346 track poins to 8 centroids 
centroids_wgs84 <- tracks_sf %>%
  # summarise by track_id, 
  # 21346 POINTs to 8 MULTIPOINTs
  group_by(track_id) %>% 
  summarise() %>% 
  # centroid of each MULTIPOINT
  st_centroid()

centroids_wgs84
#> Simple feature collection with 8 features and 1 field
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: -15.5509 ymin: 64.79553 xmax: -14.70425 ymax: 65.31308
#> Geodetic CRS:  WGS 84
#> # A tibble: 8 × 2
#>   track_id             geometry
#> * <chr>             <POINT [°]>
#> 1 ANA      (-15.44893 65.30635)
#> 2 ASA      (-15.04011 65.08037)
#> 3 AXA      (-14.71519 64.81798)
#> 4 GRIMA    (-15.19988 65.00439)
#> 5 HAUGA    (-14.70425 64.79553)
#> 6 HEIDA    (-15.19022 64.94209)
#> 7 HLIDAR    (-15.5509 65.31308)
#> 8 HNEFLA   (-14.83611 64.85308)

# visualize
mapview(tracks_sf, zcol = "track_id", cex = 2, 
        lwd = 0, alpha.regions = .02, legend = FALSE) +
mapview(centroids_wgs84, layer.name = "track_id", zcol = "track_id", cex = 6, lwd = 2)

# back to a regular data.frame / tibble with lon & lat columns
bind_cols(
  st_drop_geometry(centroids_wgs84),
  st_coordinates(centroids_wgs84)
) %>% 
  rename(lon = X, lat = Y)
#> # A tibble: 8 × 3
#>   track_id   lon   lat
#>   <chr>    <dbl> <dbl>
#> 1 ANA      -15.4  65.3
#> 2 ASA      -15.0  65.1
#> 3 AXA      -14.7  64.8
#> 4 GRIMA    -15.2  65.0
#> 5 HAUGA    -14.7  64.8
#> 6 HEIDA    -15.2  64.9
#> 7 HLIDAR   -15.6  65.3
#> 8 HNEFLA   -14.8  64.9

As a side note, depending on collected track points you may want to work with some other metrics like centroids of (convex) hulls. My selected dataset isn't the best example for this, but the difference between methods is still clearly visible:

library(ggplot2)
# find centroids of points and centroids of convex hulls for each track
tracks_sf %>% 
  group_by(track_id) %>% 
  summarise() %>% 
  mutate(convex_hull = st_convex_hull(geometry),
         points_cntr = st_centroid(geometry),
         convex_cntr = st_centroid(convex_hull)) %>% 
  # plot by track_id
  ggplot() +
  geom_sf(aes(geometry = convex_hull), fill = "grey", alpha = .2) +
  geom_sf(aes(geometry = geometry), pch = 20, alpha = .2, color = "grey60", size = .2) +
  geom_sf(aes(geometry = points_cntr, color = "point cntr"), size = 2) + 
  geom_sf(aes(geometry = convex_cntr, color = "hull cntr"),  size = 2) +
  scale_color_viridis_d(name = "Centroid method") +
  facet_wrap(~ track_id, nrow = 2) +
  theme_minimal() +
  theme(legend.position = "bottom")

Created on 2023-07-28 with reprex v2.0.2

You may also want to skim through following CRAN task views:
https://cran.r-project.org/web/views/Tracking.html
https://cran.r-project.org/web/views/SpatioTemporal.html

There are quite a few libraries targeting trajectory analysis and more often than not someone has already solved and tested a solution for that new-to-you problem.