Changeset - 6da277f993c5
[Not reviewed]
0 20 0
Hannes Muehleisen - 9 years ago 2015-06-25 10:01:41
hannes@muehleisen.org
mc3
20 files changed with 32 insertions and 32 deletions:
0 comments (0 inline, 0 general)
defer.pdf
Show inline comments
 
binary diff not shown
identity-noopt.pdf
Show inline comments
 
binary diff not shown
identity-opt.pdf
Show inline comments
 
binary diff not shown
identity.pdf
Show inline comments
 
binary diff not shown
makedot.sh
Show inline comments
 
#!/bin/bash
 

	
 
DOTOPTS="-Nfontsize=15 -Tpdf"
 

	
 
dot recycling-noopt.dot $DOTOPTS > recycling-noopt.pdf 
 
dot recycling-opt.dot $DOTOPTS > recycling-opt.pdf 
 

	
 
dot identity-noopt.dot $DOTOPTS > identity-noopt.pdf 
 
dot identity-noopt.dot -Granksep=0.3 $DOTOPTS > identity-noopt.pdf 
 
dot identity-opt.dot $DOTOPTS > identity-opt.pdf 
 

	
 
dot pushdown-noopt.dot $DOTOPTS > pushdown-noopt.pdf 
 
dot pushdown-opt.dot $DOTOPTS > pushdown-opt.pdf 
 

	
 
dot parallel.dot $DOTOPTS > parallel-tree.pdf 
 

	
 
dot survey-noopt.dot $DOTOPTS > survey-noopt.pdf 
 
dot survey-opt.dot $DOTOPTS > survey-opt.pdf 
 

	
 

	
 
dot defer.dot $DOTOPTS > defer.pdf 
makeplots.R
Show inline comments
 
@@ -18,24 +18,51 @@ xlabels <- c(expression(10^5),expression(10^6),expression(10^7),expression(10^8)
 
theme <- theme_few(base_size = 24) + 
 
theme(axis.title.y=element_text(vjust=0.9), 
 
	  axis.title.x=element_text(vjust=-0.1),
 
	  text=element_text(family="serif"),
 
	legend.position = "none"
 
    )
 

	
 

	
 
se <- function(x) sqrt(var(x)/length(x))
 

	
 

	
 

	
 
# survey
 
d <- all %>% filter(exp=="survey") %>% group_by(sys, s, conf) %>% 
 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
 
	mutate(tool=paste(sys,conf), 
 
		datasize=sapply(s, switch, alabama=47512, california=1060060, acs3yr=9093077)) %>% 
 
	filter(tool == "Renjin jitopt" | sys != "Renjin")
 

	
 
print(d %>% select(sys,s,meant) %>% arrange(s))
 

	
 
limits <- aes(ymax = meant + se, ymin=meant - se, width=2)
 

	
 
pdf("survey.pdf",width=10,height=7)
 
ggplot(d,aes(datasize,meant,group=tool)) + 
 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
 
  geom_pointrange(limits) +
 
  #scale_y_log10(breaks=ybreaks, labels=ylabels) +
 
    scale_x_log10(breaks=c(47512,1060060,9093077)) +
 

	
 
	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
 
	annotate("text", x=10^6.6, y=45, label="GNU R", family="serif", size=10)+
 
	annotate("text", x=10^6.4, y=100, label="sqlsurvey", family="serif", size=10)+
 
	annotate("text", x=10^6.8, y=15, label="Renjin", family="serif", size=10)
 

	
 
	#scale_color_brewer(palette=cBrwPl) +
 
 #guides(colour=guide_legend(keywidth=3.5))
 

	
 
dev.off()
 

	
 

	
 

	
 
# parallel
 

	
 

	
 
d <- all %>% filter(exp=="parallel", s  == 8) %>% group_by(exp, sys, conf) %>% 
 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
 
	mutate(threads=ifelse(conf=="none", 1L, as.integer(conf))) %>% mutate(tool=paste(sys,threads)) %>% filter(threads < 32)
 

	
 
print(d)
 

	
 
@@ -70,25 +97,24 @@ dev.off()
 
# p <- ggplot(d,aes(threads,speedup,group=s)) + 
 
#   geom_point(size=4) + geom_line(size=1.5, aes(group=s, linetype=sys)) +
 
#   scale_y_continuous(limits=c(0,NA)) +
 
#    scale_x_continuous(breaks=unique(d$threads)) +
 
# 	xlab("Number of Threads") + ylab("Speedup") + theme #+
 
# #	annotate("text", x=10, y=6, label="Problem parallelism", family="serif", size=10) #+
 
# 	# annotate("text", x=3.5, y=20, label="Renjin ", family="serif", size=10)
 

	
 
# print(p)
 
# dev.off()
 

	
 

	
 
stop()
 
# recycling
 
d <- all %>% filter(exp=="recycling", s > 5) %>% group_by(exp, sys, s, conf) %>% 
 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
 
	mutate(datasize=10^as.integer(s), tool=paste(sys, conf))
 

	
 
print(d)
 

	
 
limits <- aes(ymax = meant + se, ymin=meant - se, width=2)
 

	
 
pdf("recycling.pdf",width=10,height=7)
 
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
 
@@ -96,51 +122,24 @@ p <- ggplot(d,aes(datasize,meant,group=tool)) +
 
#  scale_y_log10(breaks=ybreaks, labels=ylabels) +
 
    scale_x_log10(breaks=xbreaks, labels=xlabels) +
 

	
 
	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
 
	annotate("text", x=10^7.7, y=65, label="Renjin", family="serif", size=10)+
 
	annotate("text", x=10^7.7, y=2, label="Renjin + R.", family="serif", size=10)+
 
	annotate("text", x=10^7.8, y=21, label="GNU R", family="serif", size=10)
 

	
 

	
 
print(p)
 
dev.off()
 

	
 
# survey
 
d <- all %>% filter(exp=="survey") %>% group_by(sys, s, conf) %>% 
 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
 
	mutate(tool=paste(sys,conf), 
 
		datasize=sapply(s, switch, alabama=47512, california=1060060, acs3yr=9093077)) %>% 
 
	filter(tool == "Renjin jitopt" | sys != "Renjin")
 

	
 
print(d)
 

	
 
limits <- aes(ymax = meant + se, ymin=meant - se, width=2)
 

	
 
pdf("survey.pdf",width=10,height=7)
 
ggplot(d,aes(datasize,meant,group=tool)) + 
 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
 
  geom_pointrange(limits) +
 
  #scale_y_log10(breaks=ybreaks, labels=ylabels) +
 
    scale_x_log10(breaks=c(47512,1060060,9093077)) +
 

	
 
	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
 
	annotate("text", x=10^6.6, y=45, label="GNU R", family="serif", size=10)+
 
	annotate("text", x=10^6.4, y=100, label="sqlsurvey", family="serif", size=10)+
 
	annotate("text", x=10^6.8, y=15, label="Renjin", family="serif", size=10)
 

	
 
	#scale_color_brewer(palette=cBrwPl) +
 
 #guides(colour=guide_legend(keywidth=3.5))
 

	
 
dev.off()
 

	
 

	
 
# # identity
 
d <- all %>% filter(exp=="identity", s > 5) %>% group_by(exp, sys, conf, s) %>% 
 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
 
	mutate(tool=ifelse(conf=="none", sys, paste(sys,conf)), datasize=10^as.integer(s))
 

	
 
print(d)
 

	
 
limits <- aes(ymax = meant + se, ymin=meant - se, width=2)
 

	
 
pdf("identity.pdf",width=10,height=7)
 
@@ -154,41 +153,41 @@ ggplot(d,aes(datasize,meant,group=tool)) +
 
	annotate("text", x=60000000, y=4, label="GNU R", family="serif", size=10)+
 
	annotate("text", x=40000000, y=8.1, label="Renjin ", family="serif", size=10)+
 
	annotate("text", x=40000000, y=.6, label="Renjin + Identity", family="serif", size=10)
 

	
 
	#scale_color_brewer(palette=cBrwPl) +
 
 # guides(colour=guide_legend(keywidth=3.5))
 

	
 
dev.off()
 

	
 

	
 
# pushdown
 

	
 
d <- all %>% filter(exp=="pushdown", s > 4) %>% group_by(exp, sys, s) %>% 
 
d <- all %>% filter(exp=="pushdown", s > 5) %>% group_by(exp, sys, s) %>% 
 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
 
	mutate(tool=sys, datasize=10^as.integer(s))
 

	
 
print(d)
 

	
 
limits <- aes(ymax = meant + se, ymin=meant - se, width=2)
 

	
 
pdf("pushdown.pdf",width=10,height=7)
 
p <- ggplot(d,aes(datasize,meant,group=tool)) + 
 
  geom_point(size=4) + geom_line(size=1.5, aes(group=tool, linetype=tool)) +
 
  geom_pointrange(limits) +
 
 # scale_y_log10(breaks=ybreaks, labels=ylabels) +
 
    scale_x_log10(breaks=xbreaks, labels=xlabels) +
 

	
 
	xlab("Dataset Size (elements, log scale)") + ylab("Execution Time (s)") + theme +
 
	annotate("text", x=10^7.5, y=5, label="GNU R", family="serif", size=10)+
 
	annotate("text", x=10^7.6, y=5, label="GNU R", family="serif", size=10)+
 
	annotate("text", x=10^7.6, y=.4, label="Renjin ", family="serif", size=10)
 

	
 
print(p)
 
dev.off()
 

	
 

	
 

	
 

	
 
# operators
 
d <- all %>% filter(exp=="operators", s > 5) %>% group_by(exp, sys, s, conf) %>% 
 
	summarize(meant=mean(timesec), se=se(timesec)) %>% 
 
	mutate(conf=ifelse(conf=="opt"," + Vectorization",""), tool=paste(sys,conf,sep=""), datasize=10^as.integer(s))
operators.R
Show inline comments
 
@@ -12,12 +12,13 @@ for (s in 4:sm) {
 
	for (r in 1:5) {
 
		timing <- system.time({
 
			m <- matrix(data=as.numeric(NA), ncol=1, nrow=ncol(a))
 
			for(i in 1:ncol(a)){
 
				m[i,]<-t(colSums(a[,i]*x)/sum(a[,i]))
 
			}
 
			print(m)
 
		})[[3]]
 
		log.result("operators", sys, conf, s, r, timing)
 
		clearResultRecycler()
 
	}
 
}
 

	
operators.pdf
Show inline comments
 
binary diff not shown
parallel-tree.pdf
Show inline comments
 
binary diff not shown
parallel.pdf
Show inline comments
 
binary diff not shown
pushdown-noopt.pdf
Show inline comments
 
binary diff not shown
pushdown-opt.pdf
Show inline comments
 
binary diff not shown
pushdown.pdf
Show inline comments
 
binary diff not shown
recycling-noopt.pdf
Show inline comments
 
binary diff not shown
recycling-opt.pdf
Show inline comments
 
binary diff not shown
recycling.pdf
Show inline comments
 
binary diff not shown
sqlsurvey.R
Show inline comments
 
@@ -6,20 +6,20 @@ options(na.action="na.pass")
 
#options(monetdb.debug.query=T)
 

	
 
source("harness.R")
 
for (s in c("alabama", "california", "acs3yr")) { #, "acs3yr"
 
  svydsgn <- sqlrepsurvey("pwgtp",paste("pwgtp",1:80,sep=""),
 
    scale=4/80,rscales=rep(1,80), mse=TRUE,
 
    database="monetdb://localhost/acs2", driver=MonetDB.R(),
 
    key="idkey",user="monetdb",password="monetdb",table.name=s, check.factors=data.frame())
 

	
 
  for (r in 1:5) {
 
    timing <- system.time({
 
      print(svymean(~agep, svydsgn, se=TRUE))
 
      print(svymean(~relp, svydsgn, se=TRUE))
 
      print(svymean(~adjinc, svydsgn, se=TRUE))
 
    })[[3]]
 
    log.result("survey", "sqlsurvey", conf, s, r, timing)
 
  }
 
}
 

	
 
# TODO: test with Oct14 branch, probably faster
 
# TODO: Test on bricks?
 
\ No newline at end of file
survey-noopt.pdf
Show inline comments
 
binary diff not shown
survey-opt.pdf
Show inline comments
 
binary diff not shown
survey.pdf
Show inline comments
 
binary diff not shown
0 comments (0 inline, 0 general)