I am trying to aggregate (FUN = mean) the dataset df1 by intervals of df1$depth from the dataset df2 (df2$minDepth & df2$Depth) and that by group (Station and Transect) in order to join them by the column depth, Station and Transect. The problem is the max depth depends of the station, therefore from one group of data to another the interval can be 100-200 or 100-175, which make it quite complex.
e.g. if for df2 the interval from a given station in a given transect is 400-1000, I would like the temperature from df1 being aggregate from depth >400 to <= 1000.
Another possibilities would be to replicate the rows of df2 with a df2$minDepth + 0.5 until reaching df2$maxDepth value and then join by depth.
In both case I do not really know how to proceed.
df1 <- structure(list(Transect = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L), .Label = c("1", "2", "3", "4"), class = "factor"), Station = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("", "1", "2", "3", "4",
"5", "6", "7", "8"), class = "factor"), temperature = c(0.0904,
0.15, 0.7691, 0.0146, -0.3466, 1.9339, -0.0583, 0.208, -0.0755,
1.9752, 2.0051, 1.9778, 1.9825, 3.3611, 2.3119, 0.2727, 0.0849,
2.4664, 2.6563, 1.2256, 1.4938, 1.2118, 3.561, 2.0338, 1.8384,
1.2348, 1.9761, 0.0472, 1.9731, 1.967, 0.0832, 1.9593, 1.6035,
-0.5551, 2.0336, 0.2156, -0.1609, 0.1558, 0.2346, -0.4225, 1.9736,
-0.0274, 1.9396, 1.1732, 2.0147, 1.4887, -0.0349, 1.4741, -0.1417,
1.0949, 2.6406, 1.0877, 1.8169, 2.5692, 2.4662, -0.2685, 1.9774,
1.9705, 2.0462, -0.1153, -0.6441, -0.8629, 1.9899, 1.6423, 1.541,
-0.2768, 2.4161, 1.9753, 2.6955, 0.5983, -0.2354, -0.8856, -0.923,
0.2718, 1.9774, 1.1851, 2.7037, -0.8477, 2.5275, -0.6093, 1.3051,
2.0382, 2.4863, 1.3012, 1.9462, 2.0294, 1.9392, 1.421, 1.9744,
1.9761, -1.0952, 1.5835, 2.6097, -0.3529, 1.9355, 1.9773, -0.743,
2.603, 1.5249, -0.8462, 0.4651, 1.5508, -1.2618, -1.5018, -1.686,
1.3767, 1.7305, 1.4991, 1.5003, 0.8662, 1.6691, -0.2488, 1.3982,
1.6882, -1.6824, -1.4222, 0.9335, -1.5617, 1.3818, 0.2459, 1.7215,
-1.677, -1.0255, 1.3761, 1.1095, 0.5204, 1.7848, -1.5796, 0.424,
-1.5652, 1.3739, 3.9624, 1.5481, 1.1437, -1.5125, -1.6832, 1.4667,
0.6127, 1.475, 1.4825, -0.6471, 1.0726, -1.6796, 1.1914, -1.2973,
-1.6244, 0.6191, 1.1524, 1.6302, -1.4496, 0.7461, 0.6422, 2.7357,
1.0529, 0.6229, 0.8845, 1.0181, -1.5218, 1.0376, -0.1418, 1.005,
0.98, 2.7239, -1.5238, 0.9606, 0.8511, 0.7578, 1.013, 1.0136,
6.0901, 0.2868, -1.5776, -0.2276, 2.6903, -1.3518, 1.0335, 0.7362,
0.4402, 0.8468, 0.2621, -0.0475, 0.9451, -1.4984, 0.7847, 2.598,
0.6331, 0.9527, 0.6488, 0.6407, 0.6129, -1.5152, 0.3581, 1.0337,
0.9938, 0.9142, 1.0187, 0.85, 0.6053, -0.8318, 0.7135), depth = c(161.9,
176.7, 30.5, 66.3, 128.3, 420.5, 61.1, 180.7, 77.1, 934.5, 474.5,
500.5, 768.5, 6.7, 8.5, 187.5, 157.9, 4.5, 32.7, 272.5, 37.9,
40.7, 13.1, 446.5, 406.5, 276.5, 794.5, 59.1, 818.5, 722.5, 152.3,
526.5, 362.5, 117.1, 444.5, 56.5, 95.1, 177.1, 46.5, 87.1, 908.5,
143.1, 666.5, 242.5, 432.5, 338.5, 141.9, 342.5, 79.1, 208.5,
29.1, 204, 408, 37.4, 39, 170.7, 922, 702, 438, 178.6, 146.2,
108.4, 484, 366, 348, 168.7, 44.7, 890, 22.6, 18, 171.9, 126.9,
100.1, 205.7, 938, 264, 23.6, 109.2, 43.9, 147.4, 280, 436, 41.1,
276, 550, 454, 574, 312, 876, 902, 85, 354, 35.6, 165.8, 658,
920, 81.3, 31.5, 320, 132.5, 8.8, 242.1, 101.2, 30.5, 64.4, 321.8,
205.9, 273.1, 270.3, 179, 197.1, 145.2, 304.9, 200.7, 68.4, 94.8,
170.6, 82.4, 317.4, 151.8, 227.7, 66.8, 115.6, 321, 176.3, 67,
213.2, 35.5, 153, 34.7, 323, 25, 246.5, 176.7, 88.8, 64.8, 292.7,
161.8, 287.9, 279.5, 15.9, 53, 59.6, 180.3, 98.4, 70.4, 162.6,
177.9, 195.5, 93.2, 298.2, 238.2, 365.5, 475.4, 223.4, 346.6,
422.9, 78.8, 453.7, 126.6, 395.8, 377.8, 387.5, 75.6, 372.2,
334.6, 307, 405.6, 413.2, 4.2, 143.8, 42.2, 97.5, 333.5, 90.4,
446.1, 294.2, 171.4, 333, 157.5, 129.8, 361.8, 56, 313.8, 299.5,
229.8, 367.8, 240.6, 235, 11.4, 77.6, 159.4, 445.7, 388.2, 357.4,
427.3, 334.2, 211.8, 109.4, 285.4)), row.names = c(NA, -200L), class = c("tbl_df",
"tbl", "data.frame"))
df2 <- structure(list(Transect = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 3L, 3L, 3L, 3L, 3L), .Label = c("1", "2", "3", "4", "6",
"7"), class = "factor"), Station = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 9L, 9L, 9L, 9L, 9L), .Label = c("1", "10", "11",
"12", "14", "16", "17", "18", "2", "20", "22", "23", "24", "3",
"4", "5", "7", "8", "9"), class = "factor"), minDepth = c(100L,
100L, 50L, 50L, 50L, 25L, 25L, 25L, 200L, 100L, 100L, 100L, 100L,
100L, 100L, 50L, 100L, 100L, 100L, 100L, 50L, 50L, 0L, 0L, 0L,
200L, 200L, 200L, 200L, 200L, 200L, 200L, 200L, 50L, 50L, 25L,
25L, 0L, 0L, 0L, 200L, 100L, 50L, 50L, 50L, 25L, 0L, 0L, 50L,
100L, 200L, 200L, 200L, 400L, 400L, 0L, 0L, 100L, 100L, 100L),
maxDepth = c(175L, 175L, 100L, 100L, 100L, 50L, 50L, 50L,
225L, 200L, 200L, 200L, 200L, 200L, 200L, 100L, 200L, 200L,
200L, 200L, 100L, 100L, 25L, 25L, 25L, 300L, 300L, 300L,
300L, 300L, 300L, 300L, 300L, 100L, 100L, 50L, 50L, 25L,
25L, 25L, 480L, 200L, 100L, 100L, 100L, 50L, 25L, 50L, 100L,
200L, 400L, 400L, 400L, 850L, 850L, 50L, 50L, 200L, 200L,
200L), `Average length(µm)` = c(2925, 4233.33333333333,
4367.5, 4367.5, 4367.5, 3360, 3360, 3360, 4404.16666666667,
2497.22222222222, 2497.22222222222, 2497.22222222222, 3277.5,
3277.5, 6593.75, 4525, 2822.5, 2822.5, 2822.5, 4393.75, 1992.5,
1992.5, 2012.5, 2012.5, 2012.5, 2902.5, 2902.5, 2902.5, 3232.5,
3232.5, 6675, 6675, 6675, 4325, 4325, 2064.28571428571, 5391.66666666667,
1925, 1925, 1925, 4162.5, 4216.66666666667, 2432.5, 2432.5,
3475, 2500, 3250, 3283.75, 3962.5, 4329.16666666667, 4412.5,
4412.5, 4412.5, 5252.5, 5252.5, 3550, 6400, 2345, 2345, 2345
)), row.names = c(NA, -60L), class = c("tbl_df", "tbl", "data.frame"
))
I think what you need are non-equi joins to match the depth in
df1with the depth ranges indf2. I can think of 2 approaches which can handle this.The
sqldfpackage allows you to use SQL syntax to manipulate data frames.The
data.tablepackage also allows non-equi joins.