library(MonetDB.R)
library(RSQLite)
library(RPostgreSQL)
con1 <- dbConnect(MonetDB.R(), "hmda")
con2 <- dbConnect(MonetDBLite())
con3 <- dbConnect(SQLite(), ":memory:")
con4 <- dbConnect(SQLite(), tempfile())
# PostgreSQL can't run queries
# con5 <- dbConnect(dbDriver("PostgreSQL"), host="localhost", port=5432)
ds <- c("1"=60000, "2"=600000, "3"=6000000, "4"=60000000, "f"=128818169)
tb <- sprintf("hmda_%02d", 6:11)
mtime <- function(x) system.time(x)[[3]]
results <- data.frame(sys=character(), dataset=character(), nrows=integer(), ltime=numeric(), qtime=numeric(), dtime=numeric())
run <- function(con, sys) {
for (d in names(ds)) {
# create tables
lapply(tb, function(t) {
if (dbExistsTable(con, t)) dbRemoveTable(con, t)
dbSendQuery(con, paste0('CREATE TABLE ', t, '( "asofyear" DOUBLE PRECISION, "respondentid" VARCHAR(255), "agencycode" VARCHAR(255), "loantype" DOUBLE PRECISION, "propertytype" VARCHAR(255), "loanpurpose" DOUBLE PRECISION, "occupancy" DOUBLE PRECISION, "loanamount" DOUBLE PRECISION, "preapproval" VARCHAR(255), "actiontype" DOUBLE PRECISION, "msa_md" VARCHAR(255), "statecode" VARCHAR(255), "countycode" VARCHAR(255), "censustractnumber" VARCHAR(255), "applicantethnicity" VARCHAR(255), "coapplicantethnicity" VARCHAR(255), "applicantrace1" VARCHAR(255), "applicantrace2" VARCHAR(255), "applicantrace3" VARCHAR(255), "applicantrace4" VARCHAR(255), "applicantrace5" VARCHAR(255), "coapplicantrace1" VARCHAR(255), "coapplicantrace2" VARCHAR(255), "coapplicantrace3" VARCHAR(255), "coapplicantrace4" VARCHAR(255), "coapplicantrace5" VARCHAR(255), "applicantsex" DOUBLE PRECISION, "coapplicantsex" DOUBLE PRECISION, "applicantincome" VARCHAR(255), "purchasertype" VARCHAR(255), "denialreason1" VARCHAR(255), "denialreason2" VARCHAR(255), "denialreason3" VARCHAR(255), "ratespread" VARCHAR(255), "hoepastatus" VARCHAR(255), "lienstatus" VARCHAR(255), "editstatus" VARCHAR(255), "applicationdateindicator" DOUBLE PRECISION, "sequencenumber" DOUBLE PRECISION, "population" DOUBLE PRECISION, "minoritypopulationpct" DOUBLE PRECISION, "hudmedianfamilyincome" DOUBLE PRECISION, "tracttomsa_mdincomepct" DOUBLE PRECISION, "numberofowneroccupiedunits" DOUBLE PRECISION, "numberof1to4familyunits" DOUBLE PRECISION, "activityyear" DOUBLE PRECISION, "fedtaxid" VARCHAR(255), "respondentname_ts" VARCHAR(255), "respondentmailingaddress" VARCHAR(255), "respondentcity_ts" VARCHAR(255), "respondentstate_ts" VARCHAR(255), "respondentzipcode" VARCHAR(255), "parentname_ts" VARCHAR(255), "parentaddress" VARCHAR(255), "parentcity_ts" VARCHAR(255), "parentstate_ts" VARCHAR(255), "parentzipcode" VARCHAR(255), "respondentname_panel" VARCHAR(255), "respondentcity_panel" VARCHAR(255), "respondentstate_panel" VARCHAR(255), "assets_panel" DOUBLE PRECISION, "otherlendercode_panel" DOUBLE PRECISION, "regioncode_panel" DOUBLE PRECISION, "larcount" DOUBLE PRECISION, "validityerror" VARCHAR(255), "app_min_cnt" INTEGER, "co_min_cnt" INTEGER, "appwhite" INTEGER, "cowhite" INTEGER, "race" INTEGER, "ethnicity" VARCHAR(255))'))
})
# load data
ltime <- mtime(
for (t in tb) {
fname <- paste0(getwd(), "/data/", t, "_", d, ".csv")
if (inherits(con, "SQLiteConnection")) dbWriteTable(con, name = t, value = fname, row.names = FALSE, header = FALSE, append=T, sep="|", overwrite=F)
if (inherits(con, "MonetDBConnection")) dbSendQuery(con, paste0("COPY INTO ", t, " FROM '", fname, "'", collapse="\n"))
if (inherits(con, "PostgreSQLConnection")) dbGetQuery(con, paste0("COPY ", t, " FROM '", fname, "' WITH (FORMAT CSV, DELIMITER \'|\', NULL \'null\')", collapse="\n"))
})
trows <- dbGetQuery(con, paste0("SELECT SUM(rws) AS rws FROM (", paste0("SELECT COUNT(*) AS rws FROM ", tb, collapse=" UNION ALL "), ") AS t"))$rws[[1]]
# make sure we have loaded as many rows as expected
stopifnot(trows == ds[d])
dump <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n"))
dump <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n"))
qtime <- mtime(dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n")))
dump <- dbReadTable(con, "hmda_11")
dump <- dbReadTable(con, "hmda_11")
dtime <- mtime(dbReadTable(con, "hmda_11"))
# clean up
lapply(tb, function(x) dbRemoveTable(con, x))
results <<- rbind(results, data.frame(sys=sys, dataset=d, nrows=ds[d], ltime=ltime, qtime=qtime, dtime=dtime))
print(results)
write.table(results, "results.csv", row.names=F, quote=F, sep="\t", col.names=F)
}
}
run(con2, 'MonetDBLite')
run(con4, 'SQLite-file')
run(con1, 'MonetDB')