I have a large csv file. I read the file every 10000 lines then send these lines to a database, it takes a lot of time. What I would like to test is to parallelize. I implemented this code but nothing happens, there are no errors.
library(parallel); library(doParallel); library(RMySQL);
con <- dbConnect(MySQL(), host = host, user = user, password = password, dbname = database)
path <- "file.csv"
col_names <- colnames(read.csv(path, header = T, nrows = 1))
offsets <- seq(0, file.info(path)$size / 2, by = batch_size) # length(offsets) = 335363
batch_size <- 10000
cl <- makeCluster(detectCores())
registerDoParallel(cl)
foreach(i = 1:length(offsets), .packages = c("DBI")) %dopar% {
header <- if(i > 1) F else T
setb <- read.csv(path, skip = offsets[i], nrows = batch_size,
header = header)
colnames(setb) <- col_names
dbWriteTable(con, "table1", setb, append = TRUE)
}
stopCluster(cl); dbDisconnect(con)