Files
@ da44a6058868
Branch filter:
Location: DA/monetdblite-experiments/testdriver.R - annotation
da44a6058868
4.5 KiB
text/S-plus
plots
0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a 0a35fe431f8a | library(MonetDB.R)
library(RSQLite)
library(RPostgreSQL)
con1 <- dbConnect(MonetDB.R(), "hmda")
con2 <- dbConnect(MonetDBLite())
con3 <- dbConnect(SQLite(), ":memory:")
con4 <- dbConnect(SQLite(), tempfile())
# PostgreSQL can't run queries
# con5 <- dbConnect(dbDriver("PostgreSQL"), host="localhost", port=5432)
ds <- c("1"=60000, "2"=600000, "3"=6000000, "4"=60000000, "f"=128818169)
tb <- sprintf("hmda_%02d", 6:11)
mtime <- function(x) system.time(x)[[3]]
results <- data.frame(sys=character(), dataset=character(), nrows=integer(), ltime=numeric(), qtime=numeric(), dtime=numeric())
run <- function(con, sys) {
for (d in names(ds)) {
# create tables
lapply(tb, function(t) {
if (dbExistsTable(con, t)) dbRemoveTable(con, t)
dbSendQuery(con, paste0('CREATE TABLE ', t, '( "asofyear" DOUBLE PRECISION, "respondentid" VARCHAR(255), "agencycode" VARCHAR(255), "loantype" DOUBLE PRECISION, "propertytype" VARCHAR(255), "loanpurpose" DOUBLE PRECISION, "occupancy" DOUBLE PRECISION, "loanamount" DOUBLE PRECISION, "preapproval" VARCHAR(255), "actiontype" DOUBLE PRECISION, "msa_md" VARCHAR(255), "statecode" VARCHAR(255), "countycode" VARCHAR(255), "censustractnumber" VARCHAR(255), "applicantethnicity" VARCHAR(255), "coapplicantethnicity" VARCHAR(255), "applicantrace1" VARCHAR(255), "applicantrace2" VARCHAR(255), "applicantrace3" VARCHAR(255), "applicantrace4" VARCHAR(255), "applicantrace5" VARCHAR(255), "coapplicantrace1" VARCHAR(255), "coapplicantrace2" VARCHAR(255), "coapplicantrace3" VARCHAR(255), "coapplicantrace4" VARCHAR(255), "coapplicantrace5" VARCHAR(255), "applicantsex" DOUBLE PRECISION, "coapplicantsex" DOUBLE PRECISION, "applicantincome" VARCHAR(255), "purchasertype" VARCHAR(255), "denialreason1" VARCHAR(255), "denialreason2" VARCHAR(255), "denialreason3" VARCHAR(255), "ratespread" VARCHAR(255), "hoepastatus" VARCHAR(255), "lienstatus" VARCHAR(255), "editstatus" VARCHAR(255), "applicationdateindicator" DOUBLE PRECISION, "sequencenumber" DOUBLE PRECISION, "population" DOUBLE PRECISION, "minoritypopulationpct" DOUBLE PRECISION, "hudmedianfamilyincome" DOUBLE PRECISION, "tracttomsa_mdincomepct" DOUBLE PRECISION, "numberofowneroccupiedunits" DOUBLE PRECISION, "numberof1to4familyunits" DOUBLE PRECISION, "activityyear" DOUBLE PRECISION, "fedtaxid" VARCHAR(255), "respondentname_ts" VARCHAR(255), "respondentmailingaddress" VARCHAR(255), "respondentcity_ts" VARCHAR(255), "respondentstate_ts" VARCHAR(255), "respondentzipcode" VARCHAR(255), "parentname_ts" VARCHAR(255), "parentaddress" VARCHAR(255), "parentcity_ts" VARCHAR(255), "parentstate_ts" VARCHAR(255), "parentzipcode" VARCHAR(255), "respondentname_panel" VARCHAR(255), "respondentcity_panel" VARCHAR(255), "respondentstate_panel" VARCHAR(255), "assets_panel" DOUBLE PRECISION, "otherlendercode_panel" DOUBLE PRECISION, "regioncode_panel" DOUBLE PRECISION, "larcount" DOUBLE PRECISION, "validityerror" VARCHAR(255), "app_min_cnt" INTEGER, "co_min_cnt" INTEGER, "appwhite" INTEGER, "cowhite" INTEGER, "race" INTEGER, "ethnicity" VARCHAR(255))'))
})
# load data
ltime <- mtime(
for (t in tb) {
fname <- paste0(getwd(), "/data/", t, "_", d, ".csv")
if (inherits(con, "SQLiteConnection")) dbWriteTable(con, name = t, value = fname, row.names = FALSE, header = FALSE, append=T, sep="|", overwrite=F)
if (inherits(con, "MonetDBConnection")) dbSendQuery(con, paste0("COPY INTO ", t, " FROM '", fname, "'", collapse="\n"))
if (inherits(con, "PostgreSQLConnection")) dbGetQuery(con, paste0("COPY ", t, " FROM '", fname, "' WITH (FORMAT CSV, DELIMITER \'|\', NULL \'null\')", collapse="\n"))
})
trows <- dbGetQuery(con, paste0("SELECT SUM(rws) AS rws FROM (", paste0("SELECT COUNT(*) AS rws FROM ", tb, collapse=" UNION ALL "), ") AS t"))$rws[[1]]
# make sure we have loaded as many rows as expected
stopifnot(trows == ds[d])
dump <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n"))
dump <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n"))
qtime <- mtime(dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n")))
dump <- dbReadTable(con, "hmda_11")
dump <- dbReadTable(con, "hmda_11")
dtime <- mtime(dbReadTable(con, "hmda_11"))
# clean up
lapply(tb, function(x) dbRemoveTable(con, x))
results <<- rbind(results, data.frame(sys=sys, dataset=d, nrows=ds[d], ltime=ltime, qtime=qtime, dtime=dtime))
print(results)
write.table(results, "results.csv", row.names=F, quote=F, sep="\t", col.names=F)
}
}
run(con2, 'MonetDBLite')
run(con4, 'SQLite-file')
run(con1, 'MonetDB')
|