Files @ 0a35fe431f8a
Branch filter:

Location: DA/monetdblite-experiments/testdriver.R

Hannes Muehleisen
Import
library(MonetDB.R)
library(RSQLite)
library(RPostgreSQL)

con1 <- dbConnect(MonetDB.R(), "hmda")
con2 <- dbConnect(MonetDBLite())
con3 <- dbConnect(SQLite(), ":memory:")
con4 <- dbConnect(SQLite(), tempfile())

# PostgreSQL can't run queries
# con5 <- dbConnect(dbDriver("PostgreSQL"), host="localhost", port=5432)

ds <- c("1"=60000, "2"=600000, "3"=6000000, "4"=60000000, "f"=128818169)
tb <- sprintf("hmda_%02d", 6:11)

mtime <- function(x) system.time(x)[[3]]

results <- data.frame(sys=character(), dataset=character(), nrows=integer(), ltime=numeric(), qtime=numeric(), dtime=numeric())

run <- function(con, sys) {
	for (d in names(ds)) {
		# create tables
		lapply(tb, function(t) {
			if (dbExistsTable(con, t)) dbRemoveTable(con, t)
			dbSendQuery(con, paste0('CREATE TABLE ', t, '( "asofyear" DOUBLE PRECISION, "respondentid" VARCHAR(255), "agencycode" VARCHAR(255), "loantype" DOUBLE PRECISION, "propertytype" VARCHAR(255), "loanpurpose" DOUBLE PRECISION, "occupancy" DOUBLE PRECISION, "loanamount" DOUBLE PRECISION, "preapproval" VARCHAR(255), "actiontype" DOUBLE PRECISION, "msa_md" VARCHAR(255), "statecode" VARCHAR(255), "countycode" VARCHAR(255), "censustractnumber" VARCHAR(255), "applicantethnicity" VARCHAR(255), "coapplicantethnicity" VARCHAR(255), "applicantrace1" VARCHAR(255), "applicantrace2" VARCHAR(255), "applicantrace3" VARCHAR(255), "applicantrace4" VARCHAR(255), "applicantrace5" VARCHAR(255), "coapplicantrace1" VARCHAR(255), "coapplicantrace2" VARCHAR(255), "coapplicantrace3" VARCHAR(255), "coapplicantrace4" VARCHAR(255), "coapplicantrace5" VARCHAR(255), "applicantsex" DOUBLE PRECISION, "coapplicantsex" DOUBLE PRECISION, "applicantincome" VARCHAR(255), "purchasertype" VARCHAR(255), "denialreason1" VARCHAR(255), "denialreason2" VARCHAR(255), "denialreason3" VARCHAR(255), "ratespread" VARCHAR(255), "hoepastatus" VARCHAR(255), "lienstatus" VARCHAR(255), "editstatus" VARCHAR(255), "applicationdateindicator" DOUBLE PRECISION, "sequencenumber" DOUBLE PRECISION, "population" DOUBLE PRECISION, "minoritypopulationpct" DOUBLE PRECISION, "hudmedianfamilyincome" DOUBLE PRECISION, "tracttomsa_mdincomepct" DOUBLE PRECISION, "numberofowneroccupiedunits" DOUBLE PRECISION, "numberof1to4familyunits" DOUBLE PRECISION, "activityyear" DOUBLE PRECISION, "fedtaxid" VARCHAR(255), "respondentname_ts" VARCHAR(255), "respondentmailingaddress" VARCHAR(255), "respondentcity_ts" VARCHAR(255), "respondentstate_ts" VARCHAR(255), "respondentzipcode" VARCHAR(255), "parentname_ts" VARCHAR(255), "parentaddress" VARCHAR(255), "parentcity_ts" VARCHAR(255), "parentstate_ts" VARCHAR(255), "parentzipcode" VARCHAR(255), "respondentname_panel" VARCHAR(255), "respondentcity_panel" VARCHAR(255), "respondentstate_panel" VARCHAR(255), "assets_panel" DOUBLE PRECISION, "otherlendercode_panel" DOUBLE PRECISION, "regioncode_panel" DOUBLE PRECISION, "larcount" DOUBLE PRECISION, "validityerror" VARCHAR(255), "app_min_cnt" INTEGER, "co_min_cnt" INTEGER, "appwhite" INTEGER, "cowhite" INTEGER, "race" INTEGER, "ethnicity" VARCHAR(255))'))
		})

		# load data
		ltime <- mtime(
		for (t in tb) {
			fname <- paste0(getwd(), "/data/", t, "_", d, ".csv")
			if (inherits(con, "SQLiteConnection")) dbWriteTable(con, name = t, value = fname, row.names = FALSE, header = FALSE, append=T, sep="|", overwrite=F)
			if (inherits(con, "MonetDBConnection"))  dbSendQuery(con, paste0("COPY INTO ", t, " FROM '", fname, "'", collapse="\n"))
			if (inherits(con, "PostgreSQLConnection"))  dbGetQuery(con, paste0("COPY ", t, " FROM '", fname, "' WITH (FORMAT CSV, DELIMITER \'|\', NULL \'null\')", collapse="\n"))
		})

		trows <- dbGetQuery(con, paste0("SELECT SUM(rws) AS rws FROM (", paste0("SELECT COUNT(*) AS rws FROM ", tb, collapse=" UNION ALL "), ") AS t"))$rws[[1]]

		# make sure we have loaded as many rows as expected
		stopifnot(trows == ds[d])

		dump  <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n"))
		dump  <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n"))
		qtime <- mtime(dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n")))
		dump  <- dbReadTable(con, "hmda_11")
		dump  <- dbReadTable(con, "hmda_11")
		dtime <- mtime(dbReadTable(con, "hmda_11"))

		# clean up
		lapply(tb, function(x) dbRemoveTable(con, x))

		results <<- rbind(results, data.frame(sys=sys, dataset=d, nrows=ds[d], ltime=ltime, qtime=qtime, dtime=dtime))
		print(results)
		write.table(results, "results.csv", row.names=F, quote=F, sep="\t", col.names=F)
	}
}
run(con2, 'MonetDBLite')
run(con4, 'SQLite-file')
run(con1, 'MonetDB')