I am trying to filter the rows contained in the data
column of data_split_
based off information in other columns of data_split_
and my_perts_df_
.
So specifically, I want to:
- filter the
my_perts_df_
table by excluding anypert_iname
inexclude
- then filter the same table according to
grouping_var
, which can be eitherpert_class
orpert_iname
- Finally, use this filtered table to filter the
data
column in thedata_split_
table.
These are the two tibbles:
my_perts_df_ <- structure(list(pert_iname = c("DMSO", "gsk126", "jq1-s", "unc-0646",
"geldanamycin", "decitabine", "gsk-j4", "vorinostat", "ly-294002",
"staurosporine", "ly-294002", "kn-62", "ruxolitinib", "losmapimod",
"ar a014418", "tofacitinib", "sp600125"), pert_class = c("control",
"Epigenetic", "Epigenetic", "Epigenetic", "Epigenetic", "Epigenetic",
"Epigenetic", "Epigenetic", "Epigenetic", "Kinase inhibitor",
"Kinase inhibitor", "Kinase inhibitor", "Kinase inhibitor", "Kinase inhibitor",
"Kinase inhibitor", "Kinase inhibitor", "Kinase inhibitor"),
P100 = c("P100", "P100", "P100", "P100", "P100", "P100",
"P100", "P100", "P100", "P100", "P100", "P100", "P100", "P100",
"P100", "P100", "P100"), GCP = c("GCP", "GCP", "GCP", "GCP",
"GCP", "GCP", "GCP", "GCP", "GCP", "GCP", "GCP", "GCP", NA,
"GCP", "GCP", "GCP", "GCP"), keep = c(TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE)), row.names = c(NA, -17L), class = c("tbl_df",
"tbl", "data.frame"))
data_split_ <- structure(list(dataset_type = c("P100", "P100", "GCP", "GCP"),
grouping_var = c("pert_class", "pert_iname", "pert_class",
"pert_iname"), filter_vars = list("Kinase inhibitor", "DMSO",
"Epigenetic", "DMSO"), exclude = list(NA_character_,
NA_character_, NA_character_, NA_character_), output_dir = c("output_final/p100",
"output_final/p100", "output_final/gcp", "output_final/gcp"
), data = list(structure(list(replicate_id = c("NPC--cabozantinib--VEGFR inhibitor::E01_acq_01::P-0063",
"PC3--DMSO--control::A02_acq_01::P-0034", "PC3--bms-345541--Kinase inhibitor::E12_acq_01::P-0034",
"MCF7--unc1215--Epigenetic::G09_DIA_acq_01::P-0015", "PC3--dasatinib--CML TKI::B05_acq_01::P-0061",
NA, "HUVEC--okadaic acid--Other::F11_acq_01::P-0057", "HUVEC--decitabine--Epigenetic::A05_acq_01::P-0038",
"PC3--ruxolitinib--Kinase inhibitor::E02_acq_01::P-0024",
"NPC--pd-0332991--Kinase inhibitor::B05_acq_02::P-0027",
"A549--vorinostat--Epigenetic::H05_acq_02::P-0058", "YAPC--dasatinib--CML TKI::B06_acq_01::P-0062",
"NPC--vorinostat--Epigenetic::H05_DIA_acq_01::P-0020", "A375--tbb--Kinase inhibitor::B10_acq_01::P-0022",
"NPC--nilotinib--CML TKI::B11_acq_03::P-0027", "A549--vandetanib--VEGFR inhibitor::E10_acq_01::P-0058",
"HUVEC--eplerenone--CV::D02_acq_01::P-0038", NA, "PC3--afuresertib--Kinase inhibitor::G08_acq_01::P-0034",
"NPC--ponatinib--CML TKI::A12_acq_01::P-0063", "PC3--sotrastaurin--IMiD::F10_acq_01::P-0024",
"HUVEC--atorvastatin--CV::A05_acq_01::P-0057", "HUVEC--losmapimod--Kinase inhibitor::C11_acq_01::P-0038",
"HUVEC--aspirin--CV::CS20180418_P100_HUVEC3_Batch2_P-0069_B11_acq_01::P-0069",
"A375--belinostat--Epigenetic::D08_DIA_acq_01::P-0017"),
pert_iname = c("cabozantinib", "DMSO", "bms-345541",
"unc1215", "dasatinib", "gefitinib", "okadaic acid",
"decitabine", "ruxolitinib", "pd-0332991", "vorinostat",
"dasatinib", "vorinostat", "tbb", "nilotinib", "vandetanib",
"eplerenone", "isoproterenol", "afuresertib", "ponatinib",
"sotrastaurin", "atorvastatin", "losmapimod", "aspirin",
"belinostat"), pert_class = c("VEGFR inhibitor", "control",
"Kinase inhibitor", "Epigenetic", "CML TKI", NA, "Other",
"Epigenetic", "Kinase inhibitor", "Kinase inhibitor",
"Epigenetic", "CML TKI", "Epigenetic", "Kinase inhibitor",
"CML TKI", "VEGFR inhibitor", "CV", NA, "Kinase inhibitor",
"CML TKI", "IMiD", "CV", "Kinase inhibitor", "CV", "Epigenetic"
), pr_gene_symbol = c("pS142 DPF2", "pS222 RBM17", "pS515 ZC3H14",
"pS235 RPS6", "pS405 C13orf8", "pS403 RNF169", "pS1035 GPATCH8",
"pS1075 AP1GBP1", "pS12 EIF4A3", "pS56 NOC2L", "pS230 SH3KBP1",
"pS2218 MAP4", "pS275 NANS", "pS405 C13orf8", "pS75 DDX54",
"pS446 BRAF", "pS500 C17orf85", "pS692 FAM129B", "pT596 MARK2",
"pS163 MAP3K2", "pS386 PFKP", "pS75 DDX54", "pS652 NUFIP2",
"pS275 NANS", "pS207 FASN"), value = c(0.181722044944763,
0.6737335, 0.021798491, 0.777894914150238, 0.0691990852355957,
0.2050631, 0.19466971, -0.30280375, -0.0263868570327759,
-0.196122169494629, 0.0888378620147705, -0.07376492,
0.05050297, 0.309347808361053, -0.146979808807373, -0.0501706600189209,
0.20668465, -0.279913723468781, 0.15067881, -0.0937120914459229,
-0.129467606544495, -0.07893503, -0.09858775, 0.312014818191528,
-0.0723450779914856)), row.names = c(NA, -25L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(replicate_id = c("MCF7--decitabine--Epigenetic::F02_DIA_acq_01::P-0015",
"PC3--kn-93--Kinase inhibitor::E07_acq_01::P-0024", "NPC--verapamil--CV::E06_acq_02::P-0063",
"A549--ar a014418--Kinase inhibitor::D12_acq_01::P-0033",
"A375--gsk525762a--Epigenetic::C01_DIA_acq_01::P-0017", "YAPC--gsk126--Epigenetic::A05_acq_01::P-0030",
"YAPC--trametinib--Kinase inhibitor::D10_acq_01::P-0062",
"NPC--pri-724--Other::E07_acq_03::P-0027", "PC3--gossypetin--Kinase inhibitor::F09_acq_01::P-0024",
"A549--jq1-s--Epigenetic::D03_DIA_acq_01::P-0019", NA, "HUVEC--calyculin a--Kinase inhibitor::D08_acq_01::P-0057",
"MCF7--imatinib--CML TKI::C08_acq_01::P-0060", "NPC--byl719--Kinase inhibitor::H07_acq_03::P-0027",
"MCF7--c646--Epigenetic::C07_acq_02::P-0023", "NPC--tbb--Kinase inhibitor::B10_DIA_acq_01::P-0020",
"YAPC--ms-275--Epigenetic::A07_acq_01::P-0030", "MCF7--ms-275--Epigenetic::A08_DIA_acq_01::P-0015",
"HUVEC--curcumin--Other::D08_acq_01::P-0038", "MCF7--etoposide--Other::B02_acq_01::P-0023",
"MCF7--unc-0321--Epigenetic::E06_DIA_acq_01::P-0015", "NPC--ex527--Epigenetic::C20150526_P-0016_NPC_T1_cmpds_P-0016_C11_DIA_acq_01::P-0016",
"A549--sch 900776--Kinase inhibitor::B08_acq_01::P-0033",
"A549--gsk126--Epigenetic::A06_DIA_acq_01::P-0019", NA),
pert_iname = c("decitabine", "kn-93", "verapamil", "ar a014418",
"gsk525762a", "gsk126", "trametinib", "pri-724", "gossypetin",
"jq1-s", "cyclosporine", "calyculin a", "imatinib", "byl719",
"c646", "tbb", "ms-275", "ms-275", "curcumin", "etoposide",
"unc-0321", "ex527", "sch 900776", "gsk126", "ceritinib"
), pert_class = c("Epigenetic", "Kinase inhibitor", "CV",
"Kinase inhibitor", "Epigenetic", "Epigenetic", "Kinase inhibitor",
"Other", "Kinase inhibitor", "Epigenetic", NA, "Kinase inhibitor",
"CML TKI", "Kinase inhibitor", "Epigenetic", "Kinase inhibitor",
"Epigenetic", "Epigenetic", "Other", "Other", "Epigenetic",
"Epigenetic", "Kinase inhibitor", "Epigenetic", NA),
pr_gene_symbol = c("pS207 FASN", "pS230 SH3KBP1", "pS465 WDR20",
"pS652 NUFIP2", "pS601 LARP5", "pT3893 PLEC1_1", "pY321 DYRK1A",
"pS163 MAP3K2", "pS2 PAK2", "pS200 FOSL2_1", "pS402 SRRM1",
"pS2 TMSB4X", "pS1012 NUP214", "pS1075 AP1GBP1", "pS1219 BAT2",
"pS353 SRRM2", "pS142 DPF2", "pT2675 BAT2D1", "pS1035 GPATCH8",
"pS275 NANS", "pS556 ULK1", "pS103 DHX16", "pS515 ZC3H14",
"pS1075 AP1GBP1", "pS207 FASN"), value = c(0.0880586504936218,
-0.1592857837677, -0.232870101928711, 0.145888328552246,
-0.233265995979309, 0.659018278121948, 0.03665614, -0.080927848815918,
0.115837097167969, 0.234739303588867, 0.192527055740356,
0, 0.315743684768677, -0.426863610744476, -0.0367576479911804,
-0.16833115, 0.579586148262024, 0.263529172516428, -0.7783067,
-0.214210987091064, 0.105207920074463, 0.728229999542236,
-0.03392493724823, -0.106773972511292, -0.0157837867736816
)), row.names = c(NA, -25L), class = c("tbl_df", "tbl",
"data.frame")), structure(list(replicate_id = c(NA, "A375--tretinoin--Other::A06_acq_01::G-0022",
"HAoSMC--dexamethasone--Other::A08_acq_01::G-0039a", "YAPC--epz-5687--Epigenetic::T20151105_Plate30_YAPC_T1_G-0030_G09_acq_01::G-0030",
"NPC--tbb--Kinase inhibitor::B12_acq_01::G-0020", "PC3--vandetanib--VEGFR inhibitor::E12_acq_02::G-0061",
"A549--DMSO--control::A01_acq_01::G-0033", "YAPC--flavopiridol--Kinase inhibitor::A04_acq_01::G-0032",
"HUVEC--tretinoin--Other::B09_acq_01::G-0038", "A375--sotrastaurin--IMiD::T_F12_acq_02::G-0022",
"A375--afuresertib--Kinase inhibitor::G07_acq_01::G-0028",
"MCF7--vx-970--Kinase inhibitor::C09_acq_02::G-0029", "A375--axitinib--VEGFR inhibitor::C01_acq_01::G-0059",
"NPC--trichostatin a--Epigenetic::C07_acq_02::G-0016R", "MCF7--nilotinib--CML TKI::B10_acq_01::G-0029",
"A549--DMSO--control::A01_acq_01::G-0058", "NPC--vx-970--Kinase inhibitor::C07_acq_01::G-0027",
"A375--tacrolimus--Other::H01_acq_01::G-0022", "A375--epz004777--Epigenetic::H01_acq_01::G-0017",
"A375--axitinib--VEGFR inhibitor::C01_acq_01::G-0059", "NPC--axitinib--VEGFR inhibitor::C01_acq_01::G-0063",
"HAoSMC--gsk126--Epigenetic::B05_acq_01::G-0039a", "NPC--ly-294002--Epigenetic::H12_acq_01::G-0016R",
"PC3--vemurafenib--Kinase inhibitor::E03_acq_01::G-0034",
"A375--lenalidomide--IMiD::C12_acq_01::G-0028"), pert_iname = c("olmesartan",
"tretinoin", "dexamethasone", "epz-5687", "tbb", "vandetanib",
"DMSO", "flavopiridol", "tretinoin", "sotrastaurin", "afuresertib",
"vx-970", "axitinib", "trichostatin a", "nilotinib", "DMSO",
"vx-970", "tacrolimus", "epz004777", "axitinib", "axitinib",
"gsk126", "ly-294002", "vemurafenib", "lenalidomide"), pert_class = c(NA,
"Other", "Other", "Epigenetic", "Kinase inhibitor", "VEGFR inhibitor",
"control", "Kinase inhibitor", "Other", "IMiD", "Kinase inhibitor",
"Kinase inhibitor", "VEGFR inhibitor", "Epigenetic", "CML TKI",
"control", "Kinase inhibitor", "Other", "Epigenetic", "VEGFR inhibitor",
"VEGFR inhibitor", "Epigenetic", "Epigenetic", "Kinase inhibitor",
"IMiD"), pr_gene_symbol = c("H3K9me2S10ph1K14ac0", "H3K27me1K36me3",
"H3K27me2K36me2", "H3K9me2S10ph1K14ac0", "H3K27me3K36me1",
"H4(4to17)K5ac1K8ac1K12ac1K16ac1me0", "H3K27me3K36me2", "H3K27me2K36me1",
"H3K9ac1S10ph1K14ac1", "H3K9me1K14ac1", "H3K27me2K36me2",
"H3K9me1S10ph1K14ac0", "H3K9me1K14ac0", "H3K9me1S10ph1K14ac1",
"H3K27me2K36me2", "H3K27me2K36me1", "H3K27me3K36me0", "H3K27ac1K36me0",
"H3K9me3S10ph1K14ac0", "H3K9me2S10ph1K14ac1", "H4(20to23)K20me3",
"H3NORM(41-49)", "H3K18ac0K23ac1", "H3K27ac1K36me0", "H3K27me3K36me2"
), value = c(0.0708658695220947, -0.27732902765274, 0.335925102233887,
0.148061037063599, 0.0192266702651978, -0.06201318, -0.32350385,
-0.113838315010071, 0.888034820556641, 0.0549858212471008,
-0.474118649959564, -0.0354797840118408, -0.1448524, -1.6616924,
-0.0739709138870239, -0.191317439079285, 0.404392004013062,
-1.12328338623047, -0.106091976, -0.45647645, 0.166058421134949,
-0.0442338585853577, -0.59617877, -0.9624169, 0.0658320188522339
)), row.names = c(NA, -25L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(replicate_id = c("NPC--pd0325901--Kinase inhibitor::C02_acq_01::G-0027",
"NPC--resveratrol--Other::B10_acq_02::G-0016R", "HAoSMC--sirolimus--Other::F04_acq_01::G-0039a",
"PC3--rgfp966--Epigenetic::E12_acq_01::G-0024", "MCF7--rolipram--Other::D10_acq_01::G-0023",
"PC3--sirolimus--Other::F11_acq_01::G-0018", "A375--sorafenib--VEGFR inhibitor::C10_acq_01::G-0059",
"PC3--tbb--Kinase inhibitor::B10_acq_01::G-0024", NA, "MCF7--staurosporine--Kinase inhibitor::G11_acq_01::G-0029",
"A375--pravastatin--CV::H02_acq_01::G-0028", "PC3--selumetinib--Kinase inhibitor::B01_acq_01::G-0034",
NA, "A549--sp600125--Kinase inhibitor::G04_acq_01::G-0033",
"A549--salermide--Epigenetic::D11_acq_01::G-0019", "MCF7--roscovitine--Kinase inhibitor::C11_acq_01::G-0023",
"YAPC--everolimus--Kinase inhibitor::D02_acq_01::G-0032",
"YAPC--verteporfin--Other::F06_acq_02::G-0032", "PC3--osi-027--Kinase inhibitor::A12_acq_02::G-0018",
"A549--dexamethasone--Other::D04_acq_01::G-0025", "HAoSMC--jq1-s--Epigenetic::G03_acq_01::G-0039a",
"HUVEC--tofacitinib--Kinase inhibitor::H08_acq_01::G-0038",
"MCF7--methylstat--Epigenetic::E09_acq_02::G-0015", NA, "PC3--DMSO--control::A03_acq_01::G-0018"
), pert_iname = c("pd0325901", "resveratrol", "sirolimus",
"rgfp966", "rolipram", "sirolimus", "sorafenib", "tbb", "lapatinib",
"staurosporine", "pravastatin", "selumetinib", "ceritinib",
"sp600125", "salermide", "roscovitine", "everolimus", "verteporfin",
"osi-027", "dexamethasone", "jq1-s", "tofacitinib", "methylstat",
"lapatinib", "DMSO"), pert_class = c("Kinase inhibitor",
"Other", "Other", "Epigenetic", "Other", "Other", "VEGFR inhibitor",
"Kinase inhibitor", NA, "Kinase inhibitor", "CV", "Kinase inhibitor",
NA, "Kinase inhibitor", "Epigenetic", "Kinase inhibitor",
"Kinase inhibitor", "Other", "Kinase inhibitor", "Other",
"Epigenetic", "Kinase inhibitor", "Epigenetic", NA, "control"
), pr_gene_symbol = c("H3K9me0S10ph1K14ac0", "H3K27me2K36me1",
"H3K9me2K14ac0", "H3NORM(41-49)", "H3K4me2", "H3K27me0K36me0",
"H3K9me1K14ac1", "H3K9ac1K14ac1", "H4(4to17)K5ac1me0", "H3K18ac0K23ac0",
"H3K4me2", "H3K4me1", "H3K27me3K36me0", "H3K27me2K36me2",
"H3K9me2K14ac0", "H3K27me1K36me0", "H3K27me3K36me2", "H3K27ac1K36me0",
"H3K18ac1K23ac1", "H3K79me0", "H3K27me3K36me1", "H3K27me3K36me2",
"H3K27me3K36me0", "H4(4to17)K8ac1K12ac1me0", "H3K4me1"),
value = c(-0.601195812225342, -0.056107312, 0.131532583385706,
-0.121334552764893, -0.216784358024597, -0.261847794055939,
-0.218853, -0.189859390258789, 0.0388565063476562, -0.322605133056641,
-0.0750547647476196, 0, -0.29474682, 0.3334179, -0.381944835186005,
-0.677833437919617, 0.150129675865173, 0.153744220733643,
1.26011848449707, 0.225029110908508, 0.124394655227661,
0.0346214175224304, 0.271708130836487, 0.57789421081543,
-0.622473895549774)), row.names = c(NA, -25L), class = c("tbl_df",
"tbl", "data.frame")))), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
Here's my code so far:
data_split_ %>%
mutate(perturbation_filtered_data = pmap(.l = list(grouping_var, filter_vars, exclude, data), .f = function(g_var, f_var, e_var, d){
if (any(is.na(unlist(e_var)))) {
exclude_term <- ""
} else {
exclude_term <- unlist(e_var)
}
new_pert_df <- my_perts_df_ %>%
filter(!(pert_iname %in% exclude_term)) %>%
filter(sym(g_var) == f_var)
res <- d %>%
filter(pert_iname %in% new_pert_df$pert_iname)
return(res)
}))
# A tibble: 4 x 7
dataset_type grouping_var filter_vars exclude output_dir data perturbation_filtered_data
<chr> <chr> <list> <list> <chr> <list> <list>
1 P100 pert_class <chr [1]> <chr [1]> output_final/p100 <tibble [25 x 5]> <tibble [0 x 5]>
2 P100 pert_iname <chr [1]> <chr [1]> output_final/p100 <tibble [25 x 5]> <tibble [0 x 5]>
3 GCP pert_class <chr [1]> <chr [1]> output_final/gcp <tibble [25 x 5]> <tibble [0 x 5]>
4 GCP pert_iname <chr [1]> <chr [1]> output_final/gcp <tibble [25 x 5]> <tibble [0 x 5]>
The filtered datasets all come back with 0 rows, which is wrong. I'm expecting a couple retained rows in each of the datasets. Any help is much appreciated!