diff --git a/testdriver.R b/testdriver.R new file mode 100644 index 0000000000000000000000000000000000000000..570933f03ebb6c67cc1301915c4108121f631edd --- /dev/null +++ b/testdriver.R @@ -0,0 +1,59 @@ +library(MonetDB.R) +library(RSQLite) +library(RPostgreSQL) + +con1 <- dbConnect(MonetDB.R(), "hmda") +con2 <- dbConnect(MonetDBLite()) +con3 <- dbConnect(SQLite(), ":memory:") +con4 <- dbConnect(SQLite(), tempfile()) + +# PostgreSQL can't run queries +# con5 <- dbConnect(dbDriver("PostgreSQL"), host="localhost", port=5432) + +ds <- c("1"=60000, "2"=600000, "3"=6000000, "4"=60000000, "f"=128818169) +tb <- sprintf("hmda_%02d", 6:11) + +mtime <- function(x) system.time(x)[[3]] + +results <- data.frame(sys=character(), dataset=character(), nrows=integer(), ltime=numeric(), qtime=numeric(), dtime=numeric()) + +run <- function(con, sys) { + for (d in names(ds)) { + # create tables + lapply(tb, function(t) { + if (dbExistsTable(con, t)) dbRemoveTable(con, t) + dbSendQuery(con, paste0('CREATE TABLE ', t, '( "asofyear" DOUBLE PRECISION, "respondentid" VARCHAR(255), "agencycode" VARCHAR(255), "loantype" DOUBLE PRECISION, "propertytype" VARCHAR(255), "loanpurpose" DOUBLE PRECISION, "occupancy" DOUBLE PRECISION, "loanamount" DOUBLE PRECISION, "preapproval" VARCHAR(255), "actiontype" DOUBLE PRECISION, "msa_md" VARCHAR(255), "statecode" VARCHAR(255), "countycode" VARCHAR(255), "censustractnumber" VARCHAR(255), "applicantethnicity" VARCHAR(255), "coapplicantethnicity" VARCHAR(255), "applicantrace1" VARCHAR(255), "applicantrace2" VARCHAR(255), "applicantrace3" VARCHAR(255), "applicantrace4" VARCHAR(255), "applicantrace5" VARCHAR(255), "coapplicantrace1" VARCHAR(255), "coapplicantrace2" VARCHAR(255), "coapplicantrace3" VARCHAR(255), "coapplicantrace4" VARCHAR(255), "coapplicantrace5" VARCHAR(255), "applicantsex" DOUBLE PRECISION, "coapplicantsex" DOUBLE PRECISION, "applicantincome" VARCHAR(255), "purchasertype" VARCHAR(255), "denialreason1" VARCHAR(255), "denialreason2" VARCHAR(255), "denialreason3" VARCHAR(255), "ratespread" VARCHAR(255), "hoepastatus" VARCHAR(255), "lienstatus" VARCHAR(255), "editstatus" VARCHAR(255), "applicationdateindicator" DOUBLE PRECISION, "sequencenumber" DOUBLE PRECISION, "population" DOUBLE PRECISION, "minoritypopulationpct" DOUBLE PRECISION, "hudmedianfamilyincome" DOUBLE PRECISION, "tracttomsa_mdincomepct" DOUBLE PRECISION, "numberofowneroccupiedunits" DOUBLE PRECISION, "numberof1to4familyunits" DOUBLE PRECISION, "activityyear" DOUBLE PRECISION, "fedtaxid" VARCHAR(255), "respondentname_ts" VARCHAR(255), "respondentmailingaddress" VARCHAR(255), "respondentcity_ts" VARCHAR(255), "respondentstate_ts" VARCHAR(255), "respondentzipcode" VARCHAR(255), "parentname_ts" VARCHAR(255), "parentaddress" VARCHAR(255), "parentcity_ts" VARCHAR(255), "parentstate_ts" VARCHAR(255), "parentzipcode" VARCHAR(255), "respondentname_panel" VARCHAR(255), "respondentcity_panel" VARCHAR(255), "respondentstate_panel" VARCHAR(255), "assets_panel" DOUBLE PRECISION, "otherlendercode_panel" DOUBLE PRECISION, "regioncode_panel" DOUBLE PRECISION, "larcount" DOUBLE PRECISION, "validityerror" VARCHAR(255), "app_min_cnt" INTEGER, "co_min_cnt" INTEGER, "appwhite" INTEGER, "cowhite" INTEGER, "race" INTEGER, "ethnicity" VARCHAR(255))')) + }) + + # load data + ltime <- mtime( + for (t in tb) { + fname <- paste0(getwd(), "/data/", t, "_", d, ".csv") + if (inherits(con, "SQLiteConnection")) dbWriteTable(con, name = t, value = fname, row.names = FALSE, header = FALSE, append=T, sep="|", overwrite=F) + if (inherits(con, "MonetDBConnection")) dbSendQuery(con, paste0("COPY INTO ", t, " FROM '", fname, "'", collapse="\n")) + if (inherits(con, "PostgreSQLConnection")) dbGetQuery(con, paste0("COPY ", t, " FROM '", fname, "' WITH (FORMAT CSV, DELIMITER \'|\', NULL \'null\')", collapse="\n")) + }) + + trows <- dbGetQuery(con, paste0("SELECT SUM(rws) AS rws FROM (", paste0("SELECT COUNT(*) AS rws FROM ", tb, collapse=" UNION ALL "), ") AS t"))$rws[[1]] + + # make sure we have loaded as many rows as expected + stopifnot(trows == ds[d]) + + dump <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n")) + dump <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n")) + qtime <- mtime(dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n"))) + dump <- dbReadTable(con, "hmda_11") + dump <- dbReadTable(con, "hmda_11") + dtime <- mtime(dbReadTable(con, "hmda_11")) + + # clean up + lapply(tb, function(x) dbRemoveTable(con, x)) + + results <<- rbind(results, data.frame(sys=sys, dataset=d, nrows=ds[d], ltime=ltime, qtime=qtime, dtime=dtime)) + print(results) + write.table(results, "results.csv", row.names=F, quote=F, sep="\t", col.names=F) + } +} +run(con2, 'MonetDBLite') +run(con4, 'SQLite-file') +run(con1, 'MonetDB')