` set.seed(500)
index <- sample(1:nrow(Bands_reflectance_2017),100, replace = FALSE )
Bands_reflectance_2017 <- dput(head(Bands_reflectance_2017[1:100]))
Bands_reflectance_2017 <- structure( list( t2017.01.05T08.25.12.000000000_blue = c(5064L, 5096L, 5072L, 5048L, 5048L, 5064L), t2017.01.15T08.26.22.000000000_blue = c(418L, 487L, 480L, 449L, 449L, 480L), t2017.01.25T08.21.38.000000000_blue = c(312L, 414L, 385L, 385L, 385L, 403L), t2017.02.04T08.27.09.000000000_blue = c(5156L, 5096L, 5204L, 5240L, 5240L, 5112L), t2017.02.14T08.27.29.000000000_blue = c(2554L, 2896L, 2842L, 2776L, 2776L, 2934L), t2017.02.24T08.23.38.000000000_blue = c(2662L, 2428L, 2630L, 2644L, 2644L, 2276L), t2017.03.06T08.24.47.000000000_blue = c(340L, 403L, 409L, 407L, 407L, 391L), t2017.03.16T08.16.07.000000000_blue = c(188L, 245L, 257L, 239L, 239L, 245L), t2017.03.26T08.22.43.000000000_blue = c(379L, 397L, 381L, 345L, 345L, 387L), t2017.04.05T08.23.06.000000000_blue = c(604L, 647L, 639L, 647L, 647L, 631L), t2017.04.15T08.23.45.000000000_blue = c(311L, 382L, 376L, 379L, 379L, 425L), t2017.04.25T08.23.17.000000000_blue = c(219L, 318L, 237L, 322L, 322L, 302L), t2017.05.05T08.23.45.000000000_blue = c(979L, 1030L, 1021L, 1030L, 1030L, 985L), t2017.05.15T08.28.11.000000000_blue = c(138L, 219L, 196L, 201L, 201L, 247L), t2017.05.25T08.23.46.000000000_blue = c(655L, 779L, 736L, 752L, 752L, 777L), t2017.06.04T08.25.50.000000000_blue = c(318L, 419L, 384L, 343L, 343L, 400L), t2017.06.14T08.28.06.000000000_blue = c(397L, 387L, 407L, 432L, 432L, 347L), t2017.06.24T08.26.00.000000000_blue = c(336L, 450L, 402L, 395L, 395L, 388L), t2017.07.04T08.23.42.000000000_blue = c(502L, 538L, 512L, 495L, 495L, 505L), t2017.07.09T08.23.09.000000000_blue = c(568L, 597L, 639L, 611L, 611L, 577L), t2017.07.19T08.23.43.000000000_blue = c(479L, 517L, 536L, 529L, 529L, 528L), t2017.07.24T08.23.44.000000000_blue = c(409L, 499L, 499L, 473L, 473L, 482L), t2017.07.29T08.26.12.000000000_blue = c(781L, 801L, 810L, 823L, 823L, 735L), t2017.08.03T08.26.43.000000000_blue = c(517L, 579L, 560L, 583L, 583L, 564L), t2017.08.08T08.23.41.000000000_blue = c(575L, 654L, 650L, 650L, 650L, 602L), t2017.08.13T08.23.44.000000000_blue = c(623L, 679L, 708L, 698L, 698L, 677L), t2017.08.18T08.25.16.000000000_blue = c(614L, 651L, 648L, 597L, 597L, 651L), t2017.08.23T08.22.22.000000000_blue = c(554L, 613L, 559L, 524L, 524L, 596L), t2017.08.28T08.28.01.000000000_blue = c(769L, 814L, 772L, 744L, 744L, 828L), t2017.09.02T08.23.42.000000000_blue = c(756L, 761L, 763L, 783L, 783L, 742L), t2017.09.07T08.23.30.000000000_blue = c(807L, 865L, 826L, 838L, 838L, 837L), t2017.09.12T08.23.35.000000000_blue = c(861L, 869L, 876L, 904L, 904L, 869L), t2017.09.22T08.23.38.000000000_blue = c(4640L, 3780L, 4340L, 4728L, 4728L, 3060L), t2017.09.27T08.16.41.000000000_blue = c(778L, 777L, 811L, 839L, 839L, 752L), t2017.10.02T08.17.41.000000000_blue = c(766L, 868L, 851L, 857L, 857L, 799L), t2017.10.07T08.24.51.000000000_blue = c(767L, 816L, 839L, 830L, 830L, 753L), t2017.10.12T08.24.39.000000000_blue = c(678L, 688L, 706L, 750L, 750L, 627L), t2017.10.17T08.15.32.000000000_blue = c(678L, 769L, 804L, 797L, 797L, 711L), t2017.10.22T08.21.34.000000000_blue = c(3146L, 3134L, 3128L, 3160L, 3160L, 3118L), t2017.10.27T08.23.27.000000000_blue = c(612L, 697L, 721L, 697L, 697L, 708L), t2017.11.01T08.24.41.000000000_blue = c(941L, 982L, 1001L, 1010L, 1010L, 999L), t2017.11.06T08.20.50.000000000_blue = c(670L, 824L, 836L, 824L, 824L, 785L), t2017.11.11T08.27.40.000000000_blue = c(720L, 817L, 839L, 807L, 807L, 801L), t2017.11.16T08.16.16.000000000_blue = c(9824L, 9744L, 9792L, 9744L, 9744L, 9536L), t2017.11.21T08.17.00.000000000_blue = c(749L, 841L, 838L, 738L, 738L, 830L), t2017.11.26T08.25.13.000000000_blue = c(735L, 863L, 832L, 713L, 713L, 899L), t2017.12.01T08.20.22.000000000_blue = c(674L, 836L, 816L, 800L, 800L, 771L), t2017.12.06T08.19.42.000000000_blue = c(2742L, 2770L, 2742L, 2762L, 2762L, 2798L), t2017.12.11T08.19.00.000000000_blue = c(582L, 745L, 734L, 654L, 654L, 743L), t2017.12.16T08.23.19.000000000_blue = c(926L, 1054L, 1001L, 946L, 946L, 1054L), t2017.12.21T08.20.53.000000000_blue = c(7432L, 7484L, 7456L, 7404L, 7404L, 7484L), t2017.12.26T08.20.39.000000000_blue = c(629L, 724L, 762L, 738L, 738L, 731L), t2017.12.31T08.20.04.000000000_blue = c(667L, 765L, 762L, 718L, 718L, 765L), t2017.01.05T08.25.12.000000000_green = c(5224L, 5196L, 5208L, 5152L, 5152L, 5172L), t2017.01.15T08.26.22.000000000_green = c(837L, 938L, 907L, 858L, 858L, 927L), t2017.01.25T08.21.38.000000000_green = c(735L, 808L, 770L, 770L, 770L, 836L), t2017.02.04T08.27.09.000000000_green = c(5424L, 5492L, 5488L, 5536L, 5536L, 5832L), t2017.02.14T08.27.29.000000000_green = c(3050L, 3094L, 3108L, 3228L, 3228L, 2900L), t2017.02.24T08.23.38.000000000_green = c(2664L, 2450L, 2598L, 2646L, 2646L, 2340L), t2017.03.06T08.24.47.000000000_green = c(702L, 735L, 749L, 727L, 727L, 729L), t2017.03.16T08.16.07.000000000_green = c(632L, 685L, 708L, 685L, 685L, 703L), t2017.03.26T08.22.43.000000000_green = c(744L, 841L, 806L, 809L, 809L, 818L), t2017.04.05T08.23.06.000000000_green = c(1030L, 1036L, 1044L, 1050L, 1050L, 1040L), t2017.04.15T08.23.45.000000000_green = c(634L, 720L, 708L, 699L, 699L, 751L), t2017.04.25T08.23.17.000000000_green = c(619L, 698L, 716L, 723L, 723L, 687L), t2017.05.05T08.23.45.000000000_green = c(1340L, 1368L, 1374L, 1404L, 1404L, 1354L), t2017.05.15T08.28.11.000000000_green = c(525L, 633L, 619L, 612L, 612L, 626L), t2017.05.25T08.23.46.000000000_green = c(1042L, 1118L, 1078L, 1028L, 1028L, 1148L), t2017.06.04T08.25.50.000000000_green = c(655L, 778L, 783L, 769L, 769L, 813L), t2017.06.14T08.28.06.000000000_green = c(772L, 829L, 838L, 810L, 810L, 822L), t2017.06.24T08.26.00.000000000_green = c(741L, 888L, 848L, 798L, 798L, 865L), t2017.07.04T08.23.42.000000000_green = c(867L, 918L, 912L, 846L, 846L, 946L), t2017.07.09T08.23.09.000000000_green = c(936L, 1001L, 1012L, 972L, 972L, 985L), t2017.07.19T08.23.43.000000000_green = c(848L, 911L, 925L, 915L, 915L, 903L), t2017.07.24T08.23.44.000000000_green = c(855L, 907L, 947L, 913L, 913L, 937L), t2017.07.29T08.26.12.000000000_green = c(1096L, 1106L, 1134L, 1150L, 1150L, 1116L), t2017.08.03T08.26.43.000000000_green = c(987L, 1072L, 1040L, 1030L, 1030L, 1021L), t2017.08.08T08.23.41.000000000_green = c(996L, 1011L, 1001L, 1011L, 1011L, 1032L), t2017.08.13T08.23.44.000000000_green = c(1006L, 1100L, 1082L, 1078L, 1078L, 1092L), t2017.08.18T08.25.16.000000000_green = c(977L, 1034L, 1032L, 976L, 976L, 1020L), t2017.08.23T08.22.22.000000000_green = c(976L, 1054L, 1044L, 985L, 985L, 1072L), t2017.08.28T08.28.01.000000000_green = c(1162L, 1176L, 1188L, 1150L, 1150L, 1200L), t2017.09.02T08.23.42.000000000_green = c(1136L, 1152L, 1158L, 1176L, 1176L, 1130L), t2017.09.07T08.23.30.000000000_green = c(1122L, 1166L, 1174L, 1194L, 1194L, 1162L), t2017.09.12T08.23.35.000000000_green = c(1158L, 1170L, 1168L, 1180L, 1180L, 1146L), t2017.09.22T08.23.38.000000000_green = c(3304L, 3218L, 3072L, 3580L, 3580L, 4148L), t2017.09.27T08.16.41.000000000_green = c(1172L, 1228L, 1242L, 1224L, 1224L, 1172L), t2017.10.02T08.17.41.000000000_green = c(1148L, 1224L, 1220L, 1200L, 1200L, 1164L), t2017.10.07T08.24.51.000000000_green = c(1120L, 1164L, 1160L, 1148L, 1148L, 1114L), t2017.10.12T08.24.39.000000000_green = c(1124L, 1158L, 1166L, 1144L, 1144L, 1090L), t2017.10.17T08.15.32.000000000_green = c(1092L, 1190L, 1180L, 1154L, 1154L, 1146L), t2017.10.22T08.21.34.000000000_green = c(3140L, 3124L, 3142L, 3134L, 3134L, 3096L), t2017.10.27T08.23.27.000000000_green = c(1064L, 1104L, 1116L, 1078L, 1078L, 1098L), t2017.11.01T08.24.41.000000000_green = c(1298L, 1310L, 1344L, 1344L, 1344L, 1318L), t2017.11.06T08.20.50.000000000_green = c(1114L, 1240L, 1220L, 1164L, 1164L, 1212L), t2017.11.11T08.27.40.000000000_green = c(1182L,1278L, 1278L, 1192L, 1192L, 1284L), t2017.11.16T08.16.16.000000000_green = c(8872L, 8728L, 8816L, 8904L, 8904L, 8600L), t2017.11.21T08.17.00.000000000_green = c(1166L, 1268L, 1250L, 1158L, 1158L, 1260L), t2017.11.26T08.25.13.000000000_green = c(1138L, 1272L, 1288L, 1240L, 1240L, 1278L)), row.names = c(NA, 6L), class = "data.frame") `
I have a dataframe of dates for per specific bands with 534 column headers as follow:
"t2017-12-31T08:20:04.000000000_red_edge_3"
"t2017-02-04T08:27:09.000000000_nir_1"
"t2017-12-31T08:20:04.000000000_swir_2"
Now, I want to remove everything and only remain with the date and the band name e.g in column header one and two, I want to only remain with
"2017-12-31_red_edge_3"
"2017-02-04_nir_1"
I have about 534 columns and most characters are not consistent because each date time is different and more band examples not similar to what is shown here for all the 534 records, so I was only able to remove repetitive characters such as "T08", ":","t" and "000000000" which are available in all the columns. How do I remove the values between the date and the band characters when they vary per each column and so I cannot use :
for ( col in 1:ncol(Bands_reflectance_2017[5:534])){
colnames(Bands_reflectance_2017)[5:534] <- sub(".000000000", "", colnames(Bands_reflectance_2017)[5:534]) #Remove .000000000
}
etc
Also at the end of the day, I want to replace each bandname with a band coding system such as assign "nir-1" as "B8" and "12" as the month of "December" so that for example my first and second column header reads:
B7_December31 | B8_February02 |
---|---|
Cell 1 | Cell 2 |
Cell 3 | Cell 4 |
"B7_December31", "B8_February02" which are better naming to run in a random forest. Because I am running into problems of
Error in eval(predvars, data, env) : object '"t2017-12-31T08:20:04.000000000_red_edge_3"' not found
if I keep the naming convention in the example
I have the following column header names in my dataframe (Bands_reflectance_2017) of 534 columns :
"t2017-01-25T08:21:38.000000000_blue" | "t2017-08-23T08:22:22.000000000_green" |
---|---|
Cell 1 | Cell 2 |
Cell 3 | Cell 4 |
I want to remove everything except the date and band name e.g "2017_01_25_blue"
I tried:
for ( col in 1:ncol(Bands_reflectance_2017[5:534])){
colnames(Bands_reflectance_2017)[5:534] <- sub("T08", "", colnames(Bands_reflectance_2017)[5:534]) #Remove T08
But as some of the characters I want to remove are unique per each 534 columns, I am not sure how to remove them
I expect this at the end of the day:
2017_01_25_blue | 2017_08_23_green |
---|---|
Cell 1 | Cell 2 |
Cell 3 | Cell 4 |
The later
"B2_December31", B3_August23 |
---|
Cell 1 |
Cell 3 |
I also tried this :
substr(colnames(Bands_Reflectance_2017[2:335]),2,11)
What is the best way to do it? I am fairly new to programming and to r.
Thanks for sharing your code and data. Most people won't download random files. In the future you can share data with
dput(data)
or a smaller version withdput(head(data))
.You're going to have to expand/replace the
fake_bands
data frame I made here with a data frame that has two columns. One column should have the labels, like "red_edge_3", and the other should have the appropriate band number.