I have been comparing the common elements in multiple datasets and have been having great success with a small number of sets. However, I want to compare a larger number of datasets and I am trying to automate the process of writing the code since the number of permutations possible is in the thousands. Here is what I have so far that has been working:
Intersect <- function (x) {
if (length(x) == 1) {
unlist(x)
} else if (length(x) == 2) {
intersect(x[[1]], x[[2]])
} else if (length(x) > 2){
intersect(x[[1]], Intersect(x[-1]))
}
}
Union <- function (x) {
if (length(x) == 1) {
unlist(x)
} else if (length(x) == 2) {
union(x[[1]], x[[2]])
} else if (length(x) > 2) {
union(x[[1]], Union(x[-1]))
}
}
Setdiff <- function (x, y) {
xx <- Intersect(x)
yy <- Union(y)
setdiff(xx, yy)
}
set.seed(1)
xx.1 <- list(A = set2,
B = set3,
C = set4,
D = set5)
abcd = Intersect(xx.1)
ab = Setdiff(xx.1[c("A", "B")], xx.1[c("C", "D")])
ac = Setdiff(xx.1[c("A", "C")], xx.1[c("B", "D")])
ad = Setdiff(xx.1[c("A", "D")], xx.1[c("B", "C")])
bc = Setdiff(xx.1[c("B", "C")], xx.1[c("A", "D")])
bd = Setdiff(xx.1[c("B", "D")], xx.1[c("A", "C")])
cd = Setdiff(xx.1[c("C", "D")], xx.1[c("A", "B")])
abc = Setdiff(xx.1[c("A", "B", "C")], xx.1[c("D")])
abd = Setdiff(xx.1[c("A", "B", "D")], xx.1[c("C")])
bcd = Setdiff(xx.1[c("C", "B", "D")], xx.1[c("A")])
acd = Setdiff(xx.1[c("C", "B", "A")], xx.1[c("D")])
a = Setdiff(xx.1[c("A")], xx.1[c("E","B", "C", "D")])
b = Setdiff(xx.1[c("B")], xx.1[c("E","A", "C", "D")])
c = Setdiff(xx.1[c("C")], xx.1[c("E","B", "A", "D")])
d = Setdiff(xx.1[c("D")], xx.1[c("E","B", "C", "A")])
e = Setdiff(xx.1[c("E")], xx.1[c("A","B", "C", "D")])
write.table(qpcR:::cbind.na(abcd,ab,ac,ad,bc,bd,cd,abc,abd,bcd,acd,a,b,c,d),
file="output.csv", sep=",", quote=FALSE, row.names=FALSE, col.names=TRUE)
What I want to do is automate the process of writing the "setdiff" statements so I can compare more than 4 datasets but I am having some trouble. Here is what I have so far:
Intersect <- function (x) {
# Multiple set version of intersect
# x is a list
if (length(x) == 1) {
unlist(x)
} else if (length(x) == 2) {
intersect(x[[1]], x[[2]])
} else if (length(x) > 2){
intersect(x[[1]], Intersect(x[-1]))
}
}
Union <- function (x) {
# Multiple set version of union
# x is a list
if (length(x) == 1) {
unlist(x)
} else if (length(x) == 2) {
union(x[[1]], x[[2]])
} else if (length(x) > 2) {
union(x[[1]], Union(x[-1]))
}
}
Setdiff <- function (x, y) {
# Remove the union of the y's from the common x's.
# x and y are lists of characters.
xx <- Intersect(x)
yy <- Union(y)
setdiff(xx, yy)
}
allCombs <- function(x) c(x, lapply(seq_along(x)[-1L],
function(y) combn(x, y, paste0, collapse = "")),
recursive = TRUE)
list1 = list("b","d","e","g","h","j","k","l","n","o","q","s","t","u","v","z")
permutations = allCombs(list1)
stringleft =" = Setdiff(xx.1[c("
stringleft2 = ")], xx.1[c("
stringright = ")])"
for (x in permutations) {
left = strsplit(x,split = "" )
right = Setdiff (c(left),c(list1))
pr = paste(x, stringleft, left, stringleft2, right, stringright)
print(pr)
}
the permutations are just fine, but I seem to be having trouble with the Setdiff not returning any values for the list "right" It should look something like this:
bdeghj = Setdiff(xx.1[c("b","d","e", "g", "h","j")], xx.1[c("k","l","n","o","q","s","t","u","v","z")])
but I keep on getting this:
[1] "bdhjklnuz = Setdiff(xx.1[c( c(\"b\", \"d\", \"h\", \"j\", \"k\", \"l\", \"n\", \"u\", \"z\") )], xx.1[c( )])"
[1] "bdhjklnvz = Setdiff(xx.1[c( c(\"b\", \"d\", \"h\", \"j\", \"k\", \"l\", \"n\", \"v\", \"z\") )], xx.1[c( )])"
[1] "bdhjkloqs = Setdiff(xx.1[c( c(\"b\", \"d\", \"h\", \"j\", \"k\", \"l\", \"o\", \"q\", \"s\") )], xx.1[c( )])"
[1] "bdhjkloqt = Setdiff(xx.1[c( c(\"b\", \"d\", \"h\", \"j\", \"k\", \"l\", \"o\", \"q\", \"t\") )], xx.1[c( )])"
[1] "bdhjkloqu = Setdiff(xx.1[c( c(\"b\", \"d\", \"h\", \"j\", \"k\", \"l\", \"o\", \"q\", \"u\") )], xx.1[c( )])"