File diff 000000000000 → 0a35fe431f8a
testdriver.R
Show inline comments
 
new file 100644
 
library(MonetDB.R)
 
library(RSQLite)
 
library(RPostgreSQL)
 

	
 
con1 <- dbConnect(MonetDB.R(), "hmda")
 
con2 <- dbConnect(MonetDBLite())
 
con3 <- dbConnect(SQLite(), ":memory:")
 
con4 <- dbConnect(SQLite(), tempfile())
 

	
 
# PostgreSQL can't run queries
 
# con5 <- dbConnect(dbDriver("PostgreSQL"), host="localhost", port=5432)
 

	
 
ds <- c("1"=60000, "2"=600000, "3"=6000000, "4"=60000000, "f"=128818169)
 
tb <- sprintf("hmda_%02d", 6:11)
 

	
 
mtime <- function(x) system.time(x)[[3]]
 

	
 
results <- data.frame(sys=character(), dataset=character(), nrows=integer(), ltime=numeric(), qtime=numeric(), dtime=numeric())
 

	
 
run <- function(con, sys) {
 
	for (d in names(ds)) {
 
		# create tables
 
		lapply(tb, function(t) {
 
			if (dbExistsTable(con, t)) dbRemoveTable(con, t)
 
			dbSendQuery(con, paste0('CREATE TABLE ', t, '( "asofyear" DOUBLE PRECISION, "respondentid" VARCHAR(255), "agencycode" VARCHAR(255), "loantype" DOUBLE PRECISION, "propertytype" VARCHAR(255), "loanpurpose" DOUBLE PRECISION, "occupancy" DOUBLE PRECISION, "loanamount" DOUBLE PRECISION, "preapproval" VARCHAR(255), "actiontype" DOUBLE PRECISION, "msa_md" VARCHAR(255), "statecode" VARCHAR(255), "countycode" VARCHAR(255), "censustractnumber" VARCHAR(255), "applicantethnicity" VARCHAR(255), "coapplicantethnicity" VARCHAR(255), "applicantrace1" VARCHAR(255), "applicantrace2" VARCHAR(255), "applicantrace3" VARCHAR(255), "applicantrace4" VARCHAR(255), "applicantrace5" VARCHAR(255), "coapplicantrace1" VARCHAR(255), "coapplicantrace2" VARCHAR(255), "coapplicantrace3" VARCHAR(255), "coapplicantrace4" VARCHAR(255), "coapplicantrace5" VARCHAR(255), "applicantsex" DOUBLE PRECISION, "coapplicantsex" DOUBLE PRECISION, "applicantincome" VARCHAR(255), "purchasertype" VARCHAR(255), "denialreason1" VARCHAR(255), "denialreason2" VARCHAR(255), "denialreason3" VARCHAR(255), "ratespread" VARCHAR(255), "hoepastatus" VARCHAR(255), "lienstatus" VARCHAR(255), "editstatus" VARCHAR(255), "applicationdateindicator" DOUBLE PRECISION, "sequencenumber" DOUBLE PRECISION, "population" DOUBLE PRECISION, "minoritypopulationpct" DOUBLE PRECISION, "hudmedianfamilyincome" DOUBLE PRECISION, "tracttomsa_mdincomepct" DOUBLE PRECISION, "numberofowneroccupiedunits" DOUBLE PRECISION, "numberof1to4familyunits" DOUBLE PRECISION, "activityyear" DOUBLE PRECISION, "fedtaxid" VARCHAR(255), "respondentname_ts" VARCHAR(255), "respondentmailingaddress" VARCHAR(255), "respondentcity_ts" VARCHAR(255), "respondentstate_ts" VARCHAR(255), "respondentzipcode" VARCHAR(255), "parentname_ts" VARCHAR(255), "parentaddress" VARCHAR(255), "parentcity_ts" VARCHAR(255), "parentstate_ts" VARCHAR(255), "parentzipcode" VARCHAR(255), "respondentname_panel" VARCHAR(255), "respondentcity_panel" VARCHAR(255), "respondentstate_panel" VARCHAR(255), "assets_panel" DOUBLE PRECISION, "otherlendercode_panel" DOUBLE PRECISION, "regioncode_panel" DOUBLE PRECISION, "larcount" DOUBLE PRECISION, "validityerror" VARCHAR(255), "app_min_cnt" INTEGER, "co_min_cnt" INTEGER, "appwhite" INTEGER, "cowhite" INTEGER, "race" INTEGER, "ethnicity" VARCHAR(255))'))
 
		})
 

	
 
		# load data
 
		ltime <- mtime(
 
		for (t in tb) {
 
			fname <- paste0(getwd(), "/data/", t, "_", d, ".csv")
 
			if (inherits(con, "SQLiteConnection")) dbWriteTable(con, name = t, value = fname, row.names = FALSE, header = FALSE, append=T, sep="|", overwrite=F)
 
			if (inherits(con, "MonetDBConnection"))  dbSendQuery(con, paste0("COPY INTO ", t, " FROM '", fname, "'", collapse="\n"))
 
			if (inherits(con, "PostgreSQLConnection"))  dbGetQuery(con, paste0("COPY ", t, " FROM '", fname, "' WITH (FORMAT CSV, DELIMITER \'|\', NULL \'null\')", collapse="\n"))
 
		})
 

	
 
		trows <- dbGetQuery(con, paste0("SELECT SUM(rws) AS rws FROM (", paste0("SELECT COUNT(*) AS rws FROM ", tb, collapse=" UNION ALL "), ") AS t"))$rws[[1]]
 

	
 
		# make sure we have loaded as many rows as expected
 
		stopifnot(trows == ds[d])
 

	
 
		dump  <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n"))
 
		dump  <- dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n"))
 
		qtime <- mtime(dbGetQuery(con, paste0(readLines("hmda-queries.sql"), collapse="\n")))
 
		dump  <- dbReadTable(con, "hmda_11")
 
		dump  <- dbReadTable(con, "hmda_11")
 
		dtime <- mtime(dbReadTable(con, "hmda_11"))
 

	
 
		# clean up
 
		lapply(tb, function(x) dbRemoveTable(con, x))
 

	
 
		results <<- rbind(results, data.frame(sys=sys, dataset=d, nrows=ds[d], ltime=ltime, qtime=qtime, dtime=dtime))
 
		print(results)
 
		write.table(results, "results.csv", row.names=F, quote=F, sep="\t", col.names=F)
 
	}
 
}
 
run(con2, 'MonetDBLite')
 
run(con4, 'SQLite-file')
 
run(con1, 'MonetDB')