Files @ da44a6058868
Branch filter:

Location: DA/monetdblite-experiments/testdriver.R

Hannes Muehleisen
plots
library(MonetDB.R)
library(RSQLite)
library(RPostgreSQL)

con1 <- dbConnect(MonetDB.R(), "hmda")
con2 <- dbConnect(MonetDBLite())
con3 <- dbConnect(SQLite(), ":memory:")
con4 <- dbConnect(SQLite(), tempfile())

# PostgreSQL can't run queries
# con5 <- dbConnect(dbDriver("PostgreSQL"), host="localhost", port=5432)

ds <- c("1"=60000, "2"=600000, "3"=6000000, "4"=60000000, "f"=128818169)
tb <- sprintf("hmda_%02d", 6:11)

mtime <- function(x) system.time(x)[[3]]

results <- data.frame(sys=character(), dataset=character(), nrows=integer(), ltime=numeric(), qtime=numeric(), dtime=numeric())

run <- function(con, sys) {
	for (d in names(ds)) {
		# create tables
		lapply(tb, function(t) {
			if (dbExistsTable(con, t)) dbRemoveTable(con, t)
			dbSendQuery(con, paste0('CREATE TABLE ', t, '( "asofyear" DOUBLE PRECISION, "respondentid" VARCHAR(255), "agencycode" VARCHAR(255), "loantype" DOUBLE PRECISION, "propertytype" VARCHAR(255), "loanpurpose" DOUBLE PRECISION, "occupancy" DOUBLE PRECISION, "loanamount" DOUBLE PRECISION, "preapproval" VARCHAR(255), "actiontype" DOUBLE PRECISION, "msa_md" VARCHAR(255), "statecode" VARCHAR(255), "countycode" VARCHAR(255), "censustractnumber" VARCHAR(255), "applicantethnicity" VARCHAR(255), "coapplicantethnicity" VARCHAR(255), "applicantrace1" VARCHAR(255), "applicantrace2" VARCHAR(255), "applicantrace3" VARCHAR(255), "applicantrace4" VARCHAR(255), "applicantrace5" VARCHAR(255), "coapplicantrace1" VARCHAR(255), "coapplicantrace2" VARCHAR(255), "coapplicantrace3" VARCHAR(255), "coapplicantrace4" VARCHAR(255), "coapplicantrace5" VARCHAR(255), "applicantsex" DOUBLE PRECISION, "coapplicantsex" DOUBLE PRECISION, "applicantincome" VARCHAR(255), "purchasertype" VARCHAR(255), "denialreason1" VARCHAR(255), "denialreason2" VARCHAR(255), "denialreason3" VARCHAR(255), "ratespread" VARCHAR(255), "hoepastatus" VARCHAR(255), "lienstatus" VARCHAR(255), "editstatus" VARCHAR(255), "applicationdateindicator" DOUBLE PRECISION, "sequencenumber" DOUBLE PRECISION, "population" DOUBLE PRECISION, "minoritypopulationpct" DOUBLE PRECISION, "hudmedianfamilyincome" DOUBLE PRECISION, "tracttomsa_mdincomepct" DOUBLE PRECISION, "numberofowneroccupiedunits" DOUBLE PRECISION, "numberof1to4familyunits" DOUBLE PRECISION, "activityyear" DOUBLE PRECISION, "fedtaxid" VARCHAR(255), "respondentname_ts" VARCHAR(255), "respondentmailingaddress" VARCHAR(255), "respondentcity_ts" VARCHAR(255), "respondentstate_ts" VARCHAR(255), "respondentzipcode" VARCHAR(255), "parentname_ts" VARCHAR(255), "parentaddress" VARCHAR(255), "parentcity_ts" VARCHAR(255), "parentstate_ts" VARCHAR(255), "parentzipcode" VARCHAR(255), "respondentname_panel" VARCHAR(255), "respondentcity_panel" VARCHAR(255), "respondentstate_panel" VARCHAR(255), "assets_panel" DOUBLE PRECISION, "otherlendercode_panel" DOUBLE PRECISION, "regioncode_panel" DOUBLE PRECISION, "larcount" DOUBLE PRECISION, "validityerror" VARCHAR(255), "app_min_cnt" INTEGER, "co_min_cnt" INTEGER, "appwhite" INTEGER, "cowhite" INTEGER, "race" INTEGER, "ethnicity" VARCHAR(255))'))
		})

		# load data
		ltime <- mtime(
		for (t in tb) {
			fname <- paste0(getwd(), "/data/", t, "_", d, ".csv")
			if (inherits(con, "SQLiteConnection")) dbWriteTable(con, name = t, value = fname, row.names = FALSE, header = FALSE, append=T, sep="|", overwrite=F)
			if (inherits(con, "MonetDBConnection"))  dbSendQuery(con, paste0("COPY INTO ", t, " FROM '", fname, "'", collapse="\n"))
			if (inherits(con, "PostgreSQLConnection"))  dbGetQuery(con, paste0("COPY ", t, " FROM '", fname, "' WITH (FORMAT CSV, DELIMITER \'|\', NULL \'null\')", collapse="\n"))
		})

		trows <- dbGetQuery(con, paste0("SELECT SUM(rws) AS rws FROM (", paste0("SELECT COUNT(*) AS rws FROM ", tb, collapse=" UNION ALL "), ") AS t"))$rws[[1]]

		# make sure we have loaded as many rows as expected
		stopifnot(trows == ds[d])

		dump  <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n"))
		dump  <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n"))
		qtime <- mtime(dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n")))
		dump  <- dbReadTable(con, "hmda_11")
		dump  <- dbReadTable(con, "hmda_11")
		dtime <- mtime(dbReadTable(con, "hmda_11"))

		# clean up
		lapply(tb, function(x) dbRemoveTable(con, x))

		results <<- rbind(results, data.frame(sys=sys, dataset=d, nrows=ds[d], ltime=ltime, qtime=qtime, dtime=dtime))
		print(results)
		write.table(results, "results.csv", row.names=F, quote=F, sep="\t", col.names=F)
	}
}
run(con2, 'MonetDBLite')
run(con4, 'SQLite-file')
run(con1, 'MonetDB')