Add new column to MonetDBLite table

662 Views Asked by At

I am trying to use R + MonetDB as a large-data analysis stack and am having trouble creating a new column and populating it with data from my analysis. Here is a toy example:

library(MonetDBLite)
library(DBI)

data(mtcars)
db <- dbConnect(MonetDB.R::MonetDB(), embedded="./test.db")

# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite=TRUE)

## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")


## insert data into the new column
v1 <- mtcars["mpg"] * pi
dbSendQuery(db, "INSERT INTO mtcars (v1) VALUES (?)", bind.data=v1)

And the error message:

Error in .local(conn, statement, ...) : 
  Unable to execute statement 'INSERT INTO mtcars (v1) VALUES ('c(65.9734457253857, 65.9734457253857, 71.6283125018473, 67.23008278...'.
Server says 'ERROR: 49.6371639267187, 61.8893752757189, 47.1238898038469, 67.2300827868216)' to type dbl failed.
' [#conversion of string 'c(65.9734457253857, 65.9734457253857, 71.6283125018473, 67.2300827868216, 58.7477826221291, 56.8628270299753, 44.924774946334, 76.6548607475909, 71.6283125018473, 60.318578948924, 55.9203492338983, 51.5221195188726, 54.3495529071034, 47.7522083345649, 32.6725635973338, 32.6725635973338, 46.18141200777, 101.787601976309, 95.5044166691297, 106.499990956694, 67.5442420521806, 48.6946861306418, 47.7522083345649, 41.7831822927443, 60.318578948924, 85.7654794430014, 81.6814089933346, 95.5044166691297, 
].
In addition: Warning message:
In if (is.na(value)) statement <- sub("?", "NULL", statement, fixed = TRUE) else if (valueClass %in%  :
  the condition has length > 1 and only the first element will be used

From this error I surmise that maybe bind.data can't be used with MonetDBLite?

Question:

How can I add a column(s) to a MonetDBLite table and populate it with data from my R session?

2

There are 2 best solutions below

3
On

There seems to be an issue with parameterized SQL queries via dbBind with MonetDBLite (see https://github.com/hannesmuehleisen/MonetDBLite-R/issues/16). The code below works with SQLite:

library(RSQLite)
data(mtcars)
db <- dbConnect(SQLite(), ":memory:")
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite = TRUE, row.names=TRUE)

## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")

## do computation with R
mtcars$v1 <- mtcars$mpg * pi
mtcars$row_names <- rownames(mtcars)

update_query <- dbSendQuery(db, 'update mtcars set "v1"=$v1 WHERE row_names=$row_names')

dbBind(update_query, mtcars[, c("v1", "row_names")])  # send the updated data
dbClearResult(update_query)  # release the prepared statement
dbReadTable(db, "mtcars")
dbDisconnect(db)

However, with MonetDBLite, it is producing an error (and a warning) for the dbBind step:

> dbBind(update_query, mtcars[, c("v1", "row_names")])  # send the updated data
Error in vapply(params, function(x) { : values must be length 1,
 but FUN(X[[1]]) result is length 32
In addition: Warning message:
In if (is.na(x)) "NULL" else if (is.numeric(x) || is.logical(x)) { :
  the condition has length > 1 and only the first element will be used

A workaround I came up with is to use the glue_data_sql in the glue package to "manually" compose the queries (without needing to loop through rows):

library(MonetDBLite)
library(DBI)

data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite = TRUE, row.names=TRUE)

dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")

library(glue)
mtcars$row_names <- rownames(mtcars)
mtcars$v1 <- mtcars$mpg * pi
update_query <- glue_data_sql(mtcars, "update mtcars set v1 = {v1} where row_names = {row_names};", .con=db)
lapply(update_query, dbSendQuery, conn=db)

# verify
dbReadTable(db, "mtcars")
dbDisconnect(db)

Yet another workaround that does the insert in one query (closer to @Zelazny7 's original attempt):

library(MonetDBLite)
library(DBI)

data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")

dbSendQuery(db, "CREATE TABLE mtcars (
                 row_names VARCHAR(32),
                 v1 DOUBLE);")

library(glue)
mtcars$row_names <- rownames(mtcars)
mtcars$v1 <- mtcars$mpg * pi
insert_values <- glue_data(mtcars, "('{row_names}', {v1})")
insert_values <- glue_collapse(insert_values, sep=", ", last="")
insert_query <- glue("INSERT INTO mtcars (row_names, v1) VALUES {insert_values}")
dbSendQuery(db, insert_query)

dbReadTable(db, "mtcars")
dbDisconnect(db)
2
On

First of all the "INSERT" command in your last statement is not correct. You will need the "UPDATE" statement.

That being said, I propose a solution where you can populate your MonetDBLite table directly from R:

library(MonetDBLite)
library(DBI)

data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")

# I added a rownbr to the dataset so it will be easier later
mtcars$rownbr <- 1:nrow(mtcars)

# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite=TRUE)

## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")


## insert data into the new column
v1 <- mtcars["mpg"] * pi

for (i in 1:nrow(mtcars)){
   myquery <- paste0("UPDATE mtcars SET v1 = ",v1$mpg[i], "where rownbr =",i," ;")
   dbSendQuery(db, myquery )
}