library(MonetDB.R) library(RSQLite) library(RPostgreSQL) con1 <- dbConnect(MonetDB.R(), "hmda") con2 <- dbConnect(MonetDBLite()) con3 <- dbConnect(SQLite(), ":memory:") con4 <- dbConnect(SQLite(), tempfile()) # PostgreSQL can't run queries # con5 <- dbConnect(dbDriver("PostgreSQL"), host="localhost", port=5432) ds <- c("1"=60000, "2"=600000, "3"=6000000, "4"=60000000, "f"=128818169) tb <- sprintf("hmda_%02d", 6:11) mtime <- function(x) system.time(x)[[3]] results <- data.frame(sys=character(), dataset=character(), nrows=integer(), ltime=numeric(), qtime=numeric(), dtime=numeric()) run <- function(con, sys) { for (d in names(ds)) { # create tables lapply(tb, function(t) { if (dbExistsTable(con, t)) dbRemoveTable(con, t) dbSendQuery(con, paste0('CREATE TABLE ', t, '( "asofyear" DOUBLE PRECISION, "respondentid" VARCHAR(255), "agencycode" VARCHAR(255), "loantype" DOUBLE PRECISION, "propertytype" VARCHAR(255), "loanpurpose" DOUBLE PRECISION, "occupancy" DOUBLE PRECISION, "loanamount" DOUBLE PRECISION, "preapproval" VARCHAR(255), "actiontype" DOUBLE PRECISION, "msa_md" VARCHAR(255), "statecode" VARCHAR(255), "countycode" VARCHAR(255), "censustractnumber" VARCHAR(255), "applicantethnicity" VARCHAR(255), "coapplicantethnicity" VARCHAR(255), "applicantrace1" VARCHAR(255), "applicantrace2" VARCHAR(255), "applicantrace3" VARCHAR(255), "applicantrace4" VARCHAR(255), "applicantrace5" VARCHAR(255), "coapplicantrace1" VARCHAR(255), "coapplicantrace2" VARCHAR(255), "coapplicantrace3" VARCHAR(255), "coapplicantrace4" VARCHAR(255), "coapplicantrace5" VARCHAR(255), "applicantsex" DOUBLE PRECISION, "coapplicantsex" DOUBLE PRECISION, "applicantincome" VARCHAR(255), "purchasertype" VARCHAR(255), "denialreason1" VARCHAR(255), "denialreason2" VARCHAR(255), "denialreason3" VARCHAR(255), "ratespread" VARCHAR(255), "hoepastatus" VARCHAR(255), "lienstatus" VARCHAR(255), "editstatus" VARCHAR(255), "applicationdateindicator" DOUBLE PRECISION, "sequencenumber" DOUBLE PRECISION, "population" DOUBLE PRECISION, "minoritypopulationpct" DOUBLE PRECISION, "hudmedianfamilyincome" DOUBLE PRECISION, "tracttomsa_mdincomepct" DOUBLE PRECISION, "numberofowneroccupiedunits" DOUBLE PRECISION, "numberof1to4familyunits" DOUBLE PRECISION, "activityyear" DOUBLE PRECISION, "fedtaxid" VARCHAR(255), "respondentname_ts" VARCHAR(255), "respondentmailingaddress" VARCHAR(255), "respondentcity_ts" VARCHAR(255), "respondentstate_ts" VARCHAR(255), "respondentzipcode" VARCHAR(255), "parentname_ts" VARCHAR(255), "parentaddress" VARCHAR(255), "parentcity_ts" VARCHAR(255), "parentstate_ts" VARCHAR(255), "parentzipcode" VARCHAR(255), "respondentname_panel" VARCHAR(255), "respondentcity_panel" VARCHAR(255), "respondentstate_panel" VARCHAR(255), "assets_panel" DOUBLE PRECISION, "otherlendercode_panel" DOUBLE PRECISION, "regioncode_panel" DOUBLE PRECISION, "larcount" DOUBLE PRECISION, "validityerror" VARCHAR(255), "app_min_cnt" INTEGER, "co_min_cnt" INTEGER, "appwhite" INTEGER, "cowhite" INTEGER, "race" INTEGER, "ethnicity" VARCHAR(255))')) }) # load data ltime <- mtime( for (t in tb) { fname <- paste0(getwd(), "/data/", t, "_", d, ".csv") if (inherits(con, "SQLiteConnection")) dbWriteTable(con, name = t, value = fname, row.names = FALSE, header = FALSE, append=T, sep="|", overwrite=F) if (inherits(con, "MonetDBConnection")) dbSendQuery(con, paste0("COPY INTO ", t, " FROM '", fname, "'", collapse="\n")) if (inherits(con, "PostgreSQLConnection")) dbGetQuery(con, paste0("COPY ", t, " FROM '", fname, "' WITH (FORMAT CSV, DELIMITER \'|\', NULL \'null\')", collapse="\n")) }) trows <- dbGetQuery(con, paste0("SELECT SUM(rws) AS rws FROM (", paste0("SELECT COUNT(*) AS rws FROM ", tb, collapse=" UNION ALL "), ") AS t"))$rws[[1]] # make sure we have loaded as many rows as expected stopifnot(trows == ds[d]) dump <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n")) dump <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n")) qtime <- mtime(dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n"))) dump <- dbReadTable(con, "hmda_11") dump <- dbReadTable(con, "hmda_11") dtime <- mtime(dbReadTable(con, "hmda_11")) # clean up lapply(tb, function(x) dbRemoveTable(con, x)) results <<- rbind(results, data.frame(sys=sys, dataset=d, nrows=ds[d], ltime=ltime, qtime=qtime, dtime=dtime)) print(results) write.table(results, "results.csv", row.names=F, quote=F, sep="\t", col.names=F) } } run(con2, 'MonetDBLite') run(con4, 'SQLite-file') run(con1, 'MonetDB')